diff --git a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index fde55a2f49..7df80c07b0 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -96,16 +96,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9405 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,17 +123,21 @@ STAGE PLANS: name: default.src_x2 Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out index ae377077c6..5f568c24fc 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -81,29 +81,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 65582e7c7b..b55d5b1249 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -77,29 +77,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: smallint), _col1 (type: smallint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index 7242473aff..62a1259769 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -77,29 +77,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index bf289a9fde..58cb225ddd 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -88,17 +88,17 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 92f2eaded1..ed8aa073de 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -81,29 +81,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 1adcf67510..622e2b464f 100644 --- a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -96,16 +96,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9405 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,17 +123,21 @@ STAGE PLANS: name: default.src_x2 Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator diff --git a/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out b/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out index 1cd55fa9c6..ef48bc963b 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out @@ -88,17 +88,17 @@ STAGE PLANS: outputColumnNames: cnt Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(cnt, 'hll') + aggregations: min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index ce071cba37..311a7e8532 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -110,19 +110,19 @@ STAGE PLANS: outputColumnNames: id Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id, 'hll') + aggregations: min(id), max(id), count(CASE WHEN (id is null) THEN (1) ELSE (null) END), compute_bit_vector(id, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -164,34 +164,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out index f65bf22eda..3f88fac19c 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out @@ -118,19 +118,19 @@ STAGE PLANS: outputColumnNames: id Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id, 'hll') + aggregations: min(id), max(id), count(CASE WHEN (id is null) THEN (1) ELSE (null) END), compute_bit_vector(id, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -172,34 +172,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 8bfc66795e..403401b487 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -215,29 +215,29 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0 - columns.types struct + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -471,29 +471,29 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0 - columns.types struct + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 76e460ed7a..4ca7e2b9c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -467,6 +467,8 @@ system.registerGenericUDAF("context_ngrams", new GenericUDAFContextNGrams()); system.registerGenericUDAF("compute_stats", new GenericUDAFComputeStats()); + system.registerGenericUDF("ndv_compute_bit_vector", GenericUDFNDVComputeBitVector.class); + system.registerGenericUDAF("compute_bit_vector", new GenericUDAFComputeBitVector()); system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter()); system.registerGenericUDAF("approx_distinct", new GenericUDAFApproximateDistinct()); system.registerUDAF("percentile", UDAFPercentile.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index d25cadf7ea..95d7aa40a9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -130,8 +130,10 @@ public void insertTableValuesAnalyzePipeline() throws SemanticException { partSpec.put(partKey, null); } } + List colNames = Utilities.getColumnNamesFromFieldSchema(tbl.getCols()); + List colTypes = ColumnStatsSemanticAnalyzer.getColumnTypes(tbl, colNames); String command = ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, Utilities.getColumnNamesFromFieldSchema(tbl.getCols()), conf, partSpec, isPartitionStats, true); + tbl, colNames, colTypes, conf, partSpec, isPartitionStats, true); insertAnalyzePipeline(command, true); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 2787b47b2e..8ac3e90743 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hive.ql.metadata.HiveUtils.unparseIdentifier; +import com.google.common.base.Preconditions; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -28,7 +29,6 @@ import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; @@ -40,7 +40,11 @@ import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsField; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -74,17 +78,6 @@ public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticExcepti super(queryState); } - public static String getQuote(HiveConf conf) { - String qIdSupport = conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT); - if ("column".equals(qIdSupport)) { - return "`"; - } else if ("standard".equals(qIdSupport)) { - return "\""; - } else { - return ""; - } - } - private boolean shouldRewrite(ASTNode tree) { boolean rwt = false; if (tree.getChildCount() > 1) { @@ -204,7 +197,7 @@ private static String getColTypeOf(Table tbl, String partKey) throws SemanticExc throw new SemanticException("Unknown partition key : " + partKey); } - private static List getColumnTypes(Table tbl, List colNames) { + protected static List getColumnTypes(Table tbl, List colNames) { List colTypes = new ArrayList(); List cols = tbl.getCols(); List copyColNames = new ArrayList<>(colNames); @@ -227,15 +220,16 @@ private static String getColTypeOf(Table tbl, String partKey) throws SemanticExc return colTypes; } - private String genRewrittenQuery(List colNames, HiveConf conf, Map partSpec, - boolean isPartitionStats) throws SemanticException { - String rewritten = genRewrittenQuery(tbl, colNames, conf, partSpec, isPartitionStats, false); + private String genRewrittenQuery(List colNames, List colTypes, HiveConf conf, + Map partSpec, boolean isPartitionStats) throws SemanticException { + String rewritten = genRewrittenQuery(tbl, colNames, colTypes, conf, partSpec, isPartitionStats, false); isRewritten = true; return rewritten; } - public static String genRewrittenQuery(Table tbl, List colNames, HiveConf conf, Map partSpec, - boolean isPartitionStats, boolean useTableValues) throws SemanticException{ + public static String genRewrittenQuery(Table tbl, List colNames, List colTypes, + HiveConf conf, Map partSpec, boolean isPartitionStats, + boolean useTableValues) throws SemanticException{ StringBuilder rewrittenQueryBuilder = new StringBuilder("select "); StringBuilder columnNamesBuilder = new StringBuilder(); @@ -246,26 +240,15 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon columnNamesBuilder.append(" , "); columnDummyValuesBuilder.append(" , "); } - String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); - rewrittenQueryBuilder.append("compute_stats("); + final String columnName = unparseIdentifier(colNames.get(i), conf); - rewrittenQueryBuilder.append(columnName); - rewrittenQueryBuilder.append(", '" + func + "'"); - if ("fm".equals(func)) { - int numBitVectors = 0; - try { - numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); - } catch (Exception e) { - throw new SemanticException(e.getMessage()); - } - rewrittenQueryBuilder.append(", " + numBitVectors); - } - rewrittenQueryBuilder.append(')'); + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colTypes.get(i)); + genComputeStats(rewrittenQueryBuilder, conf, i, columnName, typeInfo); columnNamesBuilder.append(unparseIdentifier(columnName, conf)); columnDummyValuesBuilder.append( - "cast(null as " + TypeInfoUtils.getTypeInfoFromTypeString(tbl.getCols().get(i).getType()).toString() + ")"); + "cast(null as " + typeInfo.toString() + ")"); } if (isPartitionStats) { @@ -304,15 +287,199 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon } String rewrittenQuery = rewrittenQueryBuilder.toString(); - rewrittenQuery = new VariableSubstitution(new HiveVariableSource() { - @Override - public Map getHiveVariable() { - return SessionState.get().getHiveVariables(); - } - }).substitute(conf, rewrittenQuery); + rewrittenQuery = new VariableSubstitution( + () -> SessionState.get().getHiveVariables()).substitute(conf, rewrittenQuery); return rewrittenQuery; } + private static void genComputeStats(StringBuilder rewrittenQueryBuilder, HiveConf conf, + int pos, String columnName, TypeInfo typeInfo) throws SemanticException { + Preconditions.checkArgument(typeInfo.getCategory() == Category.PRIMITIVE); + ColumnStatsType columnStatsType = + ColumnStatsType.getColumnStatsType((PrimitiveTypeInfo) typeInfo); + // The first column is always the type + // The rest of columns will depend on the type itself + int size = columnStatsType.getColumnStats().size() - 1; + for (int i = 0; i < size; i++) { + ColumnStatsField columnStatsField = columnStatsType.getColumnStats().get(i); + appendStatsField(rewrittenQueryBuilder, conf, columnStatsField, columnStatsType, + columnName, pos); + rewrittenQueryBuilder.append(", "); + } + ColumnStatsField columnStatsField = columnStatsType.getColumnStats().get(size); + appendStatsField(rewrittenQueryBuilder, conf, columnStatsField, columnStatsType, + columnName, pos); + } + + private static void appendStatsField(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsField columnStatsField, ColumnStatsType columnStatsType, + String columnName, int pos) throws SemanticException { + switch (columnStatsField) { + case COLUMN_TYPE: + appendColumnType(rewrittenQueryBuilder, conf, columnStatsType, pos); + break; + case COUNT_TRUES: + appendCountTrues(rewrittenQueryBuilder, conf, columnName, pos); + break; + case COUNT_FALSES: + appendCountFalses(rewrittenQueryBuilder, conf, columnName, pos); + break; + case COUNT_NULLS: + appendCountNulls(rewrittenQueryBuilder, conf, columnName, pos); + break; + case MIN: + appendMin(rewrittenQueryBuilder, conf, columnStatsType, columnName, pos); + break; + case MAX: + appendMax(rewrittenQueryBuilder, conf, columnStatsType, columnName, pos); + break; + case NDV: + appendNDV(rewrittenQueryBuilder, conf, columnName, pos); + break; + case BITVECTOR: + appendBitVector(rewrittenQueryBuilder, conf, columnName, pos); + break; + case MAX_LENGTH: + appendMaxLength(rewrittenQueryBuilder, conf, columnName, pos); + break; + case AVG_LENGTH: + appendAvgLength(rewrittenQueryBuilder, conf, columnName, pos); + break; + default: + throw new SemanticException("Not supported field " + columnStatsField); + } + } + + private static void appendColumnType(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, int pos) { + rewrittenQueryBuilder.append("'") + .append(columnStatsType.toString()) + .append("' AS ") + .append(unparseIdentifier(ColumnStatsField.COLUMN_TYPE.getFieldName() + pos, conf)); + } + + private static void appendMin(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, String columnName, int pos) { + switch (columnStatsType) { + case LONG: + rewrittenQueryBuilder.append("CAST(min(") + .append(columnName) + .append(") AS bigint) AS "); + break; + case DOUBLE: + rewrittenQueryBuilder.append("CAST(min(") + .append(columnName) + .append(") AS double) AS "); + break; + default: + rewrittenQueryBuilder.append("min(") + .append(columnName) + .append(") AS "); + break; + } + rewrittenQueryBuilder.append( + unparseIdentifier(ColumnStatsField.MIN.getFieldName() + pos, conf)); + } + + private static void appendMax(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, String columnName, int pos) { + switch (columnStatsType) { + case LONG: + rewrittenQueryBuilder.append("CAST(max(") + .append(columnName) + .append(") AS bigint) AS "); + break; + case DOUBLE: + rewrittenQueryBuilder.append("CAST(max(") + .append(columnName) + .append(") AS double) AS "); + break; + default: + rewrittenQueryBuilder.append("max(") + .append(columnName) + .append(") AS "); + break; + } + rewrittenQueryBuilder.append( + unparseIdentifier(ColumnStatsField.MAX.getFieldName() + pos, conf)); + } + + private static void appendMaxLength(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(COALESCE(max(LENGTH(") + .append(columnName) + .append(")), 0) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.MAX_LENGTH.getFieldName() + pos, conf)); + } + + private static void appendAvgLength(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(COALESCE(avg(COALESCE(LENGTH(") + .append(columnName) + .append("), 0)), 0) AS double) AS ") + .append(unparseIdentifier(ColumnStatsField.AVG_LENGTH.getFieldName() + pos, conf)); + } + + private static void appendCountNulls(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS NULL THEN 1 ELSE null END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_NULLS.getFieldName() + pos, conf)); + } + + private static void appendNDV(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) throws SemanticException { + rewrittenQueryBuilder.append("COALESCE(NDV_COMPUTE_BIT_VECTOR("); + appendBitVector(rewrittenQueryBuilder, conf, columnName); + rewrittenQueryBuilder.append("), 0) AS ") + .append(unparseIdentifier(ColumnStatsField.NDV.getFieldName() + pos, conf)); + } + + private static void appendBitVector(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) throws SemanticException { + appendBitVector(rewrittenQueryBuilder, conf, columnName); + rewrittenQueryBuilder.append(" AS ") + .append(unparseIdentifier(ColumnStatsField.BITVECTOR.getFieldName() + pos, conf)); + } + + private static void appendBitVector(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName) throws SemanticException { + String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); + rewrittenQueryBuilder.append("compute_bit_vector(") + .append(columnName) + .append(", '") + .append(func) + .append("'"); + if ("fm".equals(func)) { + int numBitVectors; + try { + numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + rewrittenQueryBuilder.append(", ") + .append(numBitVectors); + } + rewrittenQueryBuilder.append(")"); + } + + private static void appendCountTrues(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS TRUE THEN 1 ELSE null END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_TRUES.getFieldName() + pos, conf)); + } + + private static void appendCountFalses(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS FALSE THEN 1 ELSE null END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_FALSES.getFieldName() + pos, conf)); + } + private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException { // Parse the rewritten query string try { @@ -398,7 +565,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { isTableLevel = true; } colType = getColumnTypes(tbl, colNames); - rewrittenQuery = genRewrittenQuery(colNames, conf, partSpec, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); } else { // Not an analyze table column compute statistics statement - don't do any rewrites @@ -467,7 +634,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) isTableLevel = !isPartitionStats; - rewrittenQuery = genRewrittenQuery(colNames, conf, partSpec, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); return rewrittenTree; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index 1a339633d4..47fc962e90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.stats; +import com.google.common.collect.ImmutableList; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -41,6 +42,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.session.SessionState; @@ -49,11 +51,14 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + public class ColStatsProcessor implements IStatsProcessor { private static transient final Logger LOG = LoggerFactory.getLogger(ColStatsProcessor.class); @@ -87,10 +92,8 @@ public int process(Hive db, Table tbl) throws Exception { return persistColumnStats(db, tbl); } - private List constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException { - - Table tbl = tbl1; - + private List constructColumnStatsFromPackedRows(Table tbl) + throws HiveException, MetaException, IOException { String partName = null; List colName = colStatDesc.getColName(); List colType = colStatDesc.getColType(); @@ -103,22 +106,23 @@ public int process(Hive db, Table tbl) throws Exception { throw new HiveException("Unexpected object type encountered while unpacking row"); } - List statsObjs = new ArrayList(); + List statsObjs = new ArrayList<>(); StructObjectInspector soi = (StructObjectInspector) packedRow.oi; List fields = soi.getAllStructFieldRefs(); List list = soi.getStructFieldsDataAsList(packedRow.o); List partColSchema = tbl.getPartCols(); + // Partition columns are appended at end, we only care about stats column - int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); - assert list != null; - for (int i = 0; i < numOfStatCols; i++) { - StructField structField = fields.get(i); + int pos = 0; + for (int i = 0; i < colName.size(); i++) { String columnName = colName.get(i); String columnType = colType.get(i); - Object values = list.get(i); + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(columnType); + List columnStatsFields = ColumnStatsType.getColumnStats(typeInfo); try { - ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values); + ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveColumnStatistics( + columnName, columnType, columnStatsFields, pos, soi, list); statsObjs.add(statObj); } catch (Exception e) { if (isStatsReliable) { @@ -127,15 +131,16 @@ public int process(Hive db, Table tbl) throws Exception { LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e); } } + pos += columnStatsFields.size(); } if (!statsObjs.isEmpty()) { - if (!isTblLevel) { - List partVals = new ArrayList(); + List partVals = new ArrayList<>(); // Iterate over partition columns to figure out partition name - for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i)); + for (int i = pos; i < pos + partColSchema.size(); i++) { + Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(list.get(i)); partVals.add(partVal == null ? // could be null for default partition this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); } @@ -196,4 +201,148 @@ public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaExce public void setDpPartSpecs(Collection dpPartSpecs) { } + /** + * Enumeration of column stats fields that can currently + * be computed. Each one has a field name associated. + */ + public enum ColumnStatsField { + COLUMN_TYPE("columntype"), + COUNT_TRUES("counttrues"), + COUNT_FALSES("countfalses"), + COUNT_NULLS("countnulls"), + MIN("min"), + MAX("max"), + NDV("numdistinctvalues"), + BITVECTOR("ndvbitvector"), + MAX_LENGTH("maxlength"), + AVG_LENGTH("avglength"); + + private final String fieldName; + + ColumnStatsField(String fieldName) { + this.fieldName = fieldName; + } + + public String getFieldName() { + return fieldName; + } + } + + /** + * Enumeration of column stats type. Each Hive primitive type maps into a single + * column stats type, e.g., byte, short, int, and bigint types map into long + * column type. Each column stats type has _n_ column stats fields associated + * with it. + */ + public enum ColumnStatsType { + BOOLEAN( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.COUNT_TRUES, + ColumnStatsField.COUNT_FALSES, + ColumnStatsField.COUNT_NULLS)), + LONG( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + DOUBLE( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + STRING( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MAX_LENGTH, + ColumnStatsField.AVG_LENGTH, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + BINARY( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MAX_LENGTH, + ColumnStatsField.AVG_LENGTH, + ColumnStatsField.COUNT_NULLS)), + DECIMAL( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + DATE( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + TIMESTAMP( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)); + + + private final List columnStats; + + ColumnStatsType(List columnStats) { + this.columnStats = columnStats; + } + + public List getColumnStats() { + return columnStats; + } + + public static ColumnStatsType getColumnStatsType(PrimitiveTypeInfo typeInfo) + throws SemanticException { + switch (typeInfo.getPrimitiveCategory()) { + case BOOLEAN: + return BOOLEAN; + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMPLOCALTZ: + return LONG; + case FLOAT: + case DOUBLE: + return DOUBLE; + case DECIMAL: + return DECIMAL; + case DATE: + return DATE; + case TIMESTAMP: + return TIMESTAMP; + case STRING: + case CHAR: + case VARCHAR: + return STRING; + case BINARY: + return BINARY; + default: + throw new SemanticException("Not supported type " + + typeInfo.getTypeName() + " for statistics computation"); + } + } + + public static List getColumnStats(PrimitiveTypeInfo typeInfo) + throws SemanticException { + return getColumnStatsType(typeInfo).getColumnStats(); + } + + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index e6926d3d18..50ce0dd4a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsField; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -50,40 +52,49 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + public class ColumnStatisticsObjTranslator { - private static transient final Logger LOG = LoggerFactory - .getLogger(ColumnStatisticsObjTranslator.class); + public static ColumnStatisticsObj readHiveColumnStatistics(String columnName, String columnType, + List columnStatsFields, int start, StructObjectInspector soi, List list) + throws HiveException { + List fields = soi.getAllStructFieldRefs(); - public static ColumnStatisticsObj readHiveStruct(String columnName, String columnType, StructField structField, Object values) - throws HiveException - { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = structField.getFieldObjectInspector(); - Object f = values; - String fieldName = structField.getFieldName(); ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColName(columnName); statsObj.setColType(columnType); - try { - unpackStructObject(foi, f, fieldName, statsObj); - return statsObj; - } catch (Exception e) { - throw new HiveException("error calculating stats for column:" + structField.getFieldName(), e); + + int end = start + columnStatsFields.size(); + for (int i = start; i < end; i++) { + // Get the field objectInspector, fieldName and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = (list == null ? null : list.get(i)); + try { + unpackPrimitiveObject(foi, f, columnStatsFields.get(i - start), statsObj); + } catch (Exception e) { + throw new HiveException("Error calculating statistics for column:" + columnName, e); + } } + + return statsObj; } - private static void unpackBooleanStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { + private static void unpackBooleanStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { long v = ((LongObjectInspector) oi).get(o); - if (fName.equals("counttrues")) { + switch (csf) { + case COUNT_TRUES: statsObj.getStatsData().getBooleanStats().setNumTrues(v); - } else if (fName.equals("countfalses")) { + break; + case COUNT_FALSES: statsObj.getStatsData().getBooleanStats().setNumFalses(v); - } else if (fName.equals("countnulls")) { + break; + case COUNT_NULLS: statsObj.getStatsData().getBooleanStats().setNumNulls(v); + break; + default: + throw new RuntimeException("Unsupported column stat for BOOLEAN : " + csf); } } @@ -91,51 +102,67 @@ private static void unpackBooleanStats(ObjectInspector oi, Object o, String fNam static class UnsupportedDoubleException extends Exception { } - private static void unpackDoubleStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumDVs(v); - } else if (fName.equals("max")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { + private static void unpackDoubleStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumNulls(cn); + break; + case MIN: + double min = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(min) || Double.isNaN(min)) { throw new UnsupportedDoubleException(); } - statsObj.getStatsData().getDoubleStats().setHighValue(d); - } else if (fName.equals("min")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { + statsObj.getStatsData().getDoubleStats().setLowValue(min); + break; + case MAX: + double max = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(max) || Double.isNaN(max)) { throw new UnsupportedDoubleException(); } - statsObj.getStatsData().getDoubleStats().setLowValue(d); - } else if (fName.equals("ndvbitvector")) { + statsObj.getStatsData().getDoubleStats().setHighValue(max); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDoubleStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DOUBLE : " + csf); } } - private static void unpackDecimalStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumDVs(v); - } else if (fName.equals("max")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d)); - } else if (fName.equals("min")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); - } else if (fName.equals("ndvbitvector")) { + private static void unpackDecimalStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumNulls(cn); + break; + case MIN: + HiveDecimal min = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(min)); + break; + case MAX: + HiveDecimal max = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(max)); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDecimalStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DECIMAL : " + csf); } } @@ -143,141 +170,182 @@ private static Decimal convertToThriftDecimal(HiveDecimal d) { return DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()); } - private static void unpackLongStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumDVs(v); - } else if (fName.equals("max")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setHighValue(v); - } else if (fName.equals("min")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setLowValue(v); - } else if (fName.equals("ndvbitvector")) { + private static void unpackLongStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumNulls(cn); + break; + case MIN: + long min = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setLowValue(min); + break; + case MAX: + long max = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setHighValue(max); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getLongStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for LONG : " + csf); } } - private static void unpackStringStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumDVs(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setMaxColLen(v); - } else if (fName.equals("ndvbitvector")) { + private static void unpackStringStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumNulls(cn); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getStringStats().setBitVectors(buf); - ; + break; + case MAX_LENGTH: + long max = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setMaxColLen(max); + break; + case AVG_LENGTH: + double avg = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setAvgColLen(avg); + break; + default: + throw new RuntimeException("Unsupported column stat for STRING : " + csf); } } - private static void unpackBinaryStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setNumNulls(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { + private static void unpackBinaryStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setNumNulls(cn); + break; + case AVG_LENGTH: + double avg = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setAvgColLen(avg); + break; + case MAX_LENGTH: long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getBinaryStats().setMaxColLen(v); + break; + default: + throw new RuntimeException("Unsupported column stat for BINARY : " + csf); } } - private static void unpackDateStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumDVs(v); - } else if (fName.equals("max")) { - DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); - } else if (fName.equals("min")) { - DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); - } else if (fName.equals("ndvbitvector")) { + private static void unpackDateStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumNulls(cn); + break; + case MIN: + DateWritableV2 min = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setLowValue(new Date(min.getDays())); + break; + case MAX: + DateWritableV2 max = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setHighValue(new Date(max.getDays())); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDateStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DATE : " + csf); } } - private static void unpackTimestampStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getTimestampStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getTimestampStats().setNumDVs(v); - } else if (fName.equals("max")) { - TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(v.getSeconds())); - } else if (fName.equals("min")) { - TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(v.getSeconds())); - } else if (fName.equals("ndvbitvector")) { + private static void unpackTimestampStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumNulls(cn); + break; + case MIN: + TimestampWritableV2 min = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(min.getSeconds())); + break; + case MAX: + TimestampWritableV2 max = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(max.getSeconds())); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getTimestampStats().setBitVectors(buf); + break; + default: + throw new RuntimeException("Unsupported column stat for TIMESTAMP : " + csf); } } - private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + private static void unpackPrimitiveObject(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { if (o == null) { return; } // First infer the type of object - if (fieldName.equals("columntype")) { + if (csf == ColumnStatsField.COLUMN_TYPE) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); ColumnStatisticsData statsData = new ColumnStatisticsData(); - if (s.equalsIgnoreCase("long")) { + if (s.equalsIgnoreCase(ColumnStatsType.LONG.toString())) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); statsData.setLongStats(longStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("double")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DOUBLE.toString())) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); statsData.setDoubleStats(doubleStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("string")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.STRING.toString())) { StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); statsData.setStringStats(stringStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("boolean")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.BOOLEAN.toString())) { BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); statsData.setBooleanStats(booleanStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("binary")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.BINARY.toString())) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("decimal")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DECIMAL.toString())) { DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("date")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DATE.toString())) { DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("timestamp")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.TIMESTAMP.toString())) { TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); statsData.setTimestampStats(timestampStats); statsObj.setStatsData(statsData); @@ -285,44 +353,21 @@ private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String f } else { // invoke the right unpack method depending on data type of the column if (statsObj.getStatsData().isSetBooleanStats()) { - unpackBooleanStats(oi, o, fieldName, statsObj); + unpackBooleanStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetLongStats()) { - unpackLongStats(oi, o, fieldName, statsObj); + unpackLongStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDoubleStats()) { - unpackDoubleStats(oi, o, fieldName, statsObj); + unpackDoubleStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetStringStats()) { - unpackStringStats(oi, o, fieldName, statsObj); + unpackStringStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetBinaryStats()) { - unpackBinaryStats(oi, o, fieldName, statsObj); + unpackBinaryStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDecimalStats()) { - unpackDecimalStats(oi, o, fieldName, statsObj); + unpackDecimalStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDateStats()) { - unpackDateStats(oi, o, fieldName, statsObj); + unpackDateStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetTimestampStats()) { - unpackTimestampStats(oi, o, fieldName, statsObj); - } - } - } - - private static void unpackStructObject(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj cStatsObj) throws UnsupportedDoubleException { - if (oi.getCategory() != ObjectInspector.Category.STRUCT) { - throw new RuntimeException("Invalid object datatype : " + oi.getCategory().toString()); - } - - StructObjectInspector soi = (StructObjectInspector) oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(o); - - for (int i = 0; i < fields.size(); i++) { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = (list == null ? null : list.get(i)); - String fieldName = fields.get(i).getFieldName(); - - if (foi.getCategory() == ObjectInspector.Category.PRIMITIVE) { - unpackPrimitiveObject(foi, f, fieldName, cStatsObj); - } else { - unpackStructObject(foi, f, fieldName, cStatsObj); + unpackTimestampStats(oi, o, csf, statsObj); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java new file mode 100644 index 0000000000..8bfca77b51 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java @@ -0,0 +1,561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BytesWritable; + +import static org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator; +import static org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator; + +/** + * GenericUDAFComputeBitVector. This UDAF replicates part of the functionality + * that was in GenericUDAFComputeStats previously, which is deprecated now. + * In particular, it will compute a bit vector using the algorithm provided + * as a parameter. The ndv_compute_bit_vector function can be used on top of + * it to extract an estimate of the ndv from it. + */ +@Description(name = "compute_bit_vector", + value = "_FUNC_(x) - Computes bit vector for NDV computation.") +public class GenericUDAFComputeBitVector extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { + if (parameters.length < 2 ) { + throw new UDFArgumentTypeException(parameters.length - 1, + "Exactly 2 (col + hll) or 3 (col + fm + #bitvectors) arguments are expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + + ColumnStatsType cst = ColumnStatsType.getColumnStatsType(((PrimitiveTypeInfo) parameters[0])); + switch (cst) { + case LONG: + return new GenericUDAFLongStatsEvaluator(); + case DOUBLE: + return new GenericUDAFDoubleStatsEvaluator(); + case STRING: + return new GenericUDAFStringStatsEvaluator(); + case DECIMAL: + return new GenericUDAFDecimalStatsEvaluator(); + case DATE: + return new GenericUDAFDateStatsEvaluator(); + case TIMESTAMP: + return new GenericUDAFTimestampStatsEvaluator(); + default: + throw new UDFArgumentTypeException(0, + "Type argument " + parameters[0].getTypeName() + " not valid"); + } + } + + public static abstract class GenericUDAFNumericStatsEvaluator + extends GenericUDAFEvaluator { + + protected final static int MAX_BIT_VECTORS = 1024; + + /* Object Inspector corresponding to the input parameter. + */ + protected transient PrimitiveObjectInspector inputOI; + protected transient PrimitiveObjectInspector funcOI; + protected transient PrimitiveObjectInspector numVectorsOI; + + /* Object Inspector corresponding to the bitvector. + */ + protected transient BinaryObjectInspector ndvFieldOI; + + /* Partial aggregation result returned by TerminatePartial. + */ + protected transient BytesWritable partialResult; + + /* Output of final result of the aggregation. + */ + protected transient BytesWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } + } else { + ndvFieldOI = (BinaryObjectInspector) parameters[0]; + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + partialResult = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + result = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + + if (myagg.numDV == null) { + int numVectors = 0; + // func may be null when GBY op is closing. + // see mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainuser_3.q + // original behavior is to create FMSketch + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length == 3) { + numVectors = parameters[2] == null ? 0 : PrimitiveObjectInspectorUtils.getInt( + parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } + } + myagg.initNDVEstimator(func, numVectors); + } + + if (parameters[0] != null) { + myagg.update(parameters[0], inputOI); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + // Merge numDistinctValue Estimators + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partial); + if (buf != null && buf.length != 0) { + if (myagg.numDV == null) { + myagg.numDV = getNumDistinctValueEstimator(buf); + } else { + myagg.numDV.mergeEstimators(getNumDistinctValueEstimator(buf)); + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serializePartial(partialResult); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serialize(result); + } + + public abstract class NumericStatsAgg extends AbstractAggregationBuffer { + + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return (numDV == null) ? + lengthFor(model) : numDV.lengthFor(model); + } + + protected void initNDVEstimator(String func, int numBitVectors) { + numDV = getEmptyNumDistinctValueEstimator(func, numBitVectors); + } + + protected abstract void update(Object p, PrimitiveObjectInspector inputOI); + + protected Object serialize(BytesWritable result) { + if (numDV != null) { + byte[] buf = numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + + protected Object serializePartial(BytesWritable result) { + if (numDV != null) { + // Serialize numDistinctValue Estimator + byte[] buf = numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + + public void reset() throws HiveException { + numDV = null; + } + }; + } + + /** + * GenericUDAFLongStatsEvaluator. + * + */ + public static class GenericUDAFLongStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class LongStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + long v = PrimitiveObjectInspectorUtils.getLong(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new LongStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFDoubleStatsEvaluator. + */ + public static class GenericUDAFDoubleStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DoubleStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DoubleStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + public static class GenericUDAFDecimalStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DecimalStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.lengthOfDecimal() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DecimalStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFDateStatsEvaluator. + */ + public static class GenericUDAFDateStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DateStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // DateWritableV2 is mutable, DateStatsAgg needs its own copy + DateWritableV2 v = new DateWritableV2((DateWritableV2) inputOI.getPrimitiveWritableObject(p)); + numDV.addToEstimator(v.getDays()); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DateStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFTimestampStatsEvaluator. + */ + public static class GenericUDAFTimestampStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class TimestampStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // TimestampWritableV2 is mutable, TimestampStatsAgg needs its own copy + TimestampWritableV2 v = new TimestampWritableV2((TimestampWritableV2) inputOI.getPrimitiveWritableObject(p)); + numDV.addToEstimator(v.getSeconds()); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new TimestampStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFStringStatsEvaluator. + */ + public static class GenericUDAFStringStatsEvaluator extends GenericUDAFEvaluator { + + private final static int MAX_BIT_VECTORS = 1024; + + /* Object Inspector corresponding to the input parameter. + */ + private transient PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector funcOI; + private transient PrimitiveObjectInspector numVectorsOI; + + /* Object Inspector corresponding to the bitvector + */ + private transient BinaryObjectInspector ndvFieldOI; + + /* Partial aggregation result returned by TerminatePartial. + */ + private transient BytesWritable partialResult; + + /* Output of final result of the aggregation + */ + private transient BytesWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } + } else { + ndvFieldOI = (BinaryObjectInspector) parameters[0]; + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + partialResult = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + result = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + } + + @AggregationType(estimable = true) + public static class StringStatsAgg extends AbstractAggregationBuffer { + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + public boolean firstItem; + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return (numDV == null) ? + lengthFor(model) : numDV.lengthFor(model); } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + StringStatsAgg result = new StringStatsAgg(); + reset(result); + return result; + } + + public void initNDVEstimator(StringStatsAgg aggBuffer, String func, int numBitVectors) { + aggBuffer.numDV = getEmptyNumDistinctValueEstimator(func, numBitVectors); + aggBuffer.numDV.reset(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + myagg.firstItem = true; + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + Object p = parameters[0]; + StringStatsAgg myagg = (StringStatsAgg) agg; + + if (myagg.firstItem) { + int numVectors = 0; + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length > 2) { + numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors"); + } + } + + initNDVEstimator(myagg, func, numVectors); + myagg.firstItem = false; + } + + String v = PrimitiveObjectInspectorUtils.getString(p, inputOI); + if (v != null) { + // Add string value to NumDistinctValue Estimator + myagg.numDV.addToEstimator(v); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + StringStatsAgg myagg = (StringStatsAgg) agg; + + // Merge numDistinctValue Estimators + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partial); + + if (buf != null && buf.length != 0) { + if (myagg.numDV == null) { + myagg.numDV = getNumDistinctValueEstimator(buf); + } else { + myagg.numDV.mergeEstimators(getNumDistinctValueEstimator(buf)); + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + // Serialize numDistinctValue Estimator + if (myagg.numDV != null) { + byte[] buf = myagg.numDV.serialize(); + partialResult.set(buf, 0, buf.length); + } + return partialResult; + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + if (myagg.numDV != null) { + byte[] buf = myagg.numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + } + + @InterfaceAudience.LimitedPrivate(value = { "Hive" }) + static int lengthFor(JavaDataModel model) { + int length = model.object(); + // HiveConf hive.stats.ndv.error default produces 16 + length += model.array() * 3; // three array + length += model.primitive1() * 16 * 2; // two int array + length += (model.object() + model.array() + model.primitive1() + model.primitive2()) + * 16; // bitset array + return length; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 4e7c598155..2b5f90e2c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -61,6 +61,7 @@ */ @Description(name = "compute_stats", value = "_FUNC_(x) - Returns the statistical summary of a set of primitive type values.") +@Deprecated public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFComputeStats.class.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java new file mode 100644 index 0000000000..9f9d8eb044 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + + +/** + * GenericUDFNDVComputeBitVector. The ndv_compute_bit_vector function can be used on top of + * compute_bit_vector aggregate function to extract an estimate of the ndv from it. + */ +@Description(name = "ndv_compute_bit_vector", + value = "_FUNC_(x) - Extracts NDV from bit vector.") +public class GenericUDFNDVComputeBitVector extends GenericUDF { + + protected transient BinaryObjectInspector inputOI; + protected final LongWritable result = new LongWritable(0); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "ndv_compute_bitvector input only takes primitive types, got " + arguments[0].getTypeName()); + } + PrimitiveObjectInspector objectInspector = (PrimitiveObjectInspector) arguments[0]; + if (objectInspector.getPrimitiveCategory() != PrimitiveCategory.BINARY) { + throw new UDFArgumentTypeException(0, + "ndv_compute_bitvector input only takes BINARY type, got " + arguments[0].getTypeName()); + } + inputOI = (BinaryObjectInspector) arguments[0]; + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (arguments[0] == null) { + return null; + } + Object input = arguments[0].get(); + if (input == null) { + return null; + } + + byte[] buf = inputOI.getPrimitiveJavaObject(input); + if (buf == null || buf.length == 0) { + return null; + } + NumDistinctValueEstimator numDV = + NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(buf); + result.set(numDV.estimateNumDistinctValues()); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("ndv_compute_bit_vector", children, ","); + } +} diff --git a/ql/src/test/results/clientnegative/fileformat_void_input.q.out b/ql/src/test/results/clientnegative/fileformat_void_input.q.out index 59242ea2f4..b9f404cb9a 100644 --- a/ql/src/test/results/clientnegative/fileformat_void_input.q.out +++ b/ql/src/test/results/clientnegative/fileformat_void_input.q.out @@ -10,4 +10,4 @@ POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -FAILED: SemanticException 1:72 Input format must implement InputFormat. Error encountered near token 'dest1' +FAILED: SemanticException 1:761 Input format must implement InputFormat. Error encountered near token 'dest1' diff --git a/ql/src/test/results/clientnegative/masking_mv.q.out b/ql/src/test/results/clientnegative/masking_mv.q.out index 02ff153254..b832d39a03 100644 --- a/ql/src/test/results/clientnegative/masking_mv.q.out +++ b/ql/src/test/results/clientnegative/masking_mv.q.out @@ -62,29 +62,33 @@ STAGE PLANS: outputColumnNames: col1 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index 5d09134eb8..b36877c831 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -188,11 +188,11 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '1' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 1 @@ -205,8 +205,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string,int,int,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -265,7 +265,7 @@ STAGE PLANS: Map-reduce partition columns: '1' (type: string) Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -278,8 +278,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string,int,int,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -288,8 +288,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string,int,int,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -298,14 +298,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '1' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 275 Data size: 1100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 275 Data size: 1100 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 @@ -320,8 +320,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out index bbe6b8baed..604d5d5a03 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out @@ -407,11 +407,11 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3223 Data size: 610250 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 3223 Data size: 610250 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 1 @@ -424,8 +424,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string,int,int,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -485,7 +485,7 @@ STAGE PLANS: Map-reduce partition columns: '2' (type: string) Statistics: Num rows: 3223 Data size: 610250 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -498,8 +498,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string,int,int,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -508,8 +508,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string,int,int,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -518,14 +518,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1611 Data size: 305030 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1611 Data size: 305030 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 @@ -540,8 +540,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 5b47efaccd..e47a10f306 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -662,29 +662,33 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(CASE WHEN (v1 is null) THEN (1) ELSE (null) END), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(CASE WHEN (v2 is null) THEN (1) ELSE (null) END), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index 83516bf2f5..c3ed681a32 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -268,11 +268,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -284,8 +284,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -421,9 +421,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -435,8 +435,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -445,8 +445,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -455,34 +455,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value @@ -693,11 +697,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -709,8 +713,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -846,9 +850,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -860,8 +864,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -870,8 +874,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -880,34 +884,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index c95804c623..238a841d28 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -252,11 +252,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -268,8 +268,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -405,9 +405,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -419,8 +419,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -429,8 +429,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -439,34 +439,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value @@ -677,11 +681,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -693,8 +697,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -830,9 +834,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -844,8 +848,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -854,8 +858,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -864,34 +868,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index bbff481863..9d024596e9 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -252,11 +252,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -268,8 +268,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -405,9 +405,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -419,8 +419,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -429,8 +429,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -439,34 +439,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value @@ -677,11 +681,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -693,8 +697,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -830,9 +834,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -844,8 +848,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -854,8 +858,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types int,struct,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -864,34 +868,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out index b43c6758ed..999cc02184 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -283,19 +283,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 312 Data size: 178025 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -388,34 +388,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -823,19 +827,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 163 Data size: 93968 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -928,34 +932,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index bd2e6a8cbf..90461cd459 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -213,19 +213,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 146 Data size: 70215 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -272,34 +272,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index cd3576a652..eeada740c4 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -270,19 +270,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 163 Data size: 93968 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -329,34 +329,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/join_map_ppr.q.out b/ql/src/test/results/clientpositive/join_map_ppr.q.out index bf57f67346..2ff19571db 100644 --- a/ql/src/test/results/clientpositive/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -143,19 +143,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 1100 Data size: 195800 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -204,34 +204,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -726,19 +730,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 1100 Data size: 104500 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -787,34 +791,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index bc9bdc7116..193e7ee251 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -84,12 +84,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -97,9 +97,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -184,30 +184,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index aeeffa5e20..b6c1f7b8e7 100644 --- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1885,7 +1885,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Reducer 5 @@ -2757,7 +2757,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Reducer 5 @@ -2765,7 +2765,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Reducer 6 diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out index 8a05edf911..54159c5dc0 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: a, b, d Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(d), max(d), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: date), _col9 (type: date), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DATE' (type: string), _col8 (type: date), _col9 (type: date), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -289,33 +293,37 @@ STAGE PLANS: outputColumnNames: a, b, d Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(d), max(d), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: date), _col9 (type: date), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DATE' (type: string), _col8 (type: date), _col9 (type: date), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out index 5e34ef39cb..cfeab917d8 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out @@ -133,31 +133,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out index 6d9150eb82..f5c0f3a625 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out @@ -59,37 +59,37 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -285,37 +285,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 788 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 788 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -469,37 +469,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 788 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 788 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out index e92048ccb3..568939d164 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out @@ -83,12 +83,12 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: int) @@ -96,9 +96,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -144,30 +144,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:int + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:int escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -342,12 +342,12 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: int) @@ -355,9 +355,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -403,30 +403,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:int + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:int escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -539,37 +539,37 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out index b674164063..890c3f7d14 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out @@ -56,19 +56,19 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: one (type: string), two (type: string), three (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 500 Data size: 422500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 500 Data size: 422500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -82,18 +82,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 500 Data size: 388500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out index 797ccdd911..96e91b547e 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out @@ -106,35 +106,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out index 48d1d92078..5d41ff5357 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out @@ -119,12 +119,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -132,9 +132,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Filter Operator isSamplingPred: false @@ -178,12 +178,12 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: '2008-12-31' (type: string), _col1 (type: string) @@ -191,9 +191,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -358,30 +358,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -396,30 +396,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out index d3542c947f..bf744a2492 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join1.q.out b/ql/src/test/results/clientpositive/llap/auto_join1.q.out index 8c039780de..b2c0c54254 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join1.q.out @@ -94,33 +94,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join14.q.out b/ql/src/test/results/clientpositive/llap/auto_join14.q.out index 708b0ea9cd..5714ce1e44 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join14.q.out @@ -100,33 +100,37 @@ STAGE PLANS: outputColumnNames: c1, c2 Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join17.q.out b/ql/src/test/results/clientpositive/llap/auto_join17.q.out index e6500b52a5..5b55cc3c58 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join17.q.out @@ -73,16 +73,16 @@ STAGE PLANS: outputColumnNames: key1, value1, key2, value2 Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -111,17 +111,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join19.q.out b/ql/src/test/results/clientpositive/llap/auto_join19.q.out index 93ab531bda..42ef41383c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join19.q.out @@ -85,16 +85,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -123,17 +123,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out index abab9ae0ab..f04f415712 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out @@ -85,16 +85,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -123,17 +123,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join2.q.out b/ql/src/test/results/clientpositive/llap/auto_join2.q.out index 9982478128..ac476f9f4b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join2.q.out @@ -83,16 +83,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -143,17 +143,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join26.q.out b/ql/src/test/results/clientpositive/llap/auto_join26.q.out index cbe9af7367..f140b82e68 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join26.q.out @@ -122,31 +122,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join3.q.out b/ql/src/test/results/clientpositive/llap/auto_join3.q.out index 6f0fcad352..49a278cd45 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join3.q.out @@ -83,16 +83,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -142,17 +142,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join4.q.out b/ql/src/test/results/clientpositive/llap/auto_join4.q.out index 341c892a7b..2f51300c75 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join4.q.out @@ -95,16 +95,16 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -133,17 +133,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join5.q.out b/ql/src/test/results/clientpositive/llap/auto_join5.q.out index 6419b80bb9..f3b2ef5b71 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join5.q.out @@ -117,33 +117,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join6.q.out b/ql/src/test/results/clientpositive/llap/auto_join6.q.out index aac9321390..b108195921 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join6.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join7.q.out b/ql/src/test/results/clientpositive/llap/auto_join7.q.out index cd3b8c4c9d..0929e22a2a 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join7.q.out @@ -169,31 +169,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6 Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join8.q.out b/ql/src/test/results/clientpositive/llap/auto_join8.q.out index 3f3dcd252c..4a52fab75a 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join8.q.out @@ -98,16 +98,16 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -136,17 +136,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join9.q.out b/ql/src/test/results/clientpositive/llap/auto_join9.q.out index 06c25ec17d..fae8acff1f 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join9.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 3ae8a54ae8..7f66c6a370 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1763,16 +1763,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) File Output Operator compressed: false Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE @@ -1786,47 +1786,55 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2055,16 +2063,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 @@ -2088,17 +2096,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2125,31 +2137,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index 2d89f0fd2c..901da8144c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -132,16 +132,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -159,16 +159,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -197,32 +197,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -431,16 +439,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -458,16 +466,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -496,32 +504,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -730,16 +746,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -757,16 +773,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -795,32 +811,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/binary_output_format.q.out b/ql/src/test/results/clientpositive/llap/binary_output_format.q.out index 0b7a75eede..5f8c69c9d5 100644 --- a/ql/src/test/results/clientpositive/llap/binary_output_format.q.out +++ b/ql/src/test/results/clientpositive/llap/binary_output_format.q.out @@ -124,19 +124,19 @@ STAGE PLANS: outputColumnNames: mydata Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(mydata, 'hll') + aggregations: max(length(mydata)), avg(COALESCE(length(mydata),0)), count(CASE WHEN (mydata is null) THEN (1) ELSE (null) END), compute_bit_vector(mydata, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) auto parallelism: false Path -> Alias: hdfs://### HDFS PATH ### [src] @@ -182,34 +182,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket1.q.out b/ql/src/test/results/clientpositive/llap/bucket1.q.out index 706d391ea9..db2ec6ae8a 100644 --- a/ql/src/test/results/clientpositive/llap/bucket1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket1.q.out @@ -136,53 +136,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket2.q.out b/ql/src/test/results/clientpositive/llap/bucket2.q.out index d40bd0107b..93acfaf182 100644 --- a/ql/src/test/results/clientpositive/llap/bucket2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -135,34 +135,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket3.q.out b/ql/src/test/results/clientpositive/llap/bucket3.q.out index 094286123e..41e2ec6727 100644 --- a/ql/src/test/results/clientpositive/llap/bucket3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -139,12 +139,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -152,39 +152,39 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: true Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/bucket4.q.out b/ql/src/test/results/clientpositive/llap/bucket4.q.out index 7246806692..c43f03ce9b 100644 --- a/ql/src/test/results/clientpositive/llap/bucket4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -136,34 +136,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket5.q.out b/ql/src/test/results/clientpositive/llap/bucket5.q.out index 7127f021c4..10bc109a45 100644 --- a/ql/src/test/results/clientpositive/llap/bucket5.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -174,53 +174,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 4 Execution mode: llap Needs Tagging: false @@ -261,53 +265,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket6.q.out b/ql/src/test/results/clientpositive/llap/bucket6.q.out index ac9fb00c57..ea8115e3c1 100644 --- a/ql/src/test/results/clientpositive/llap/bucket6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out index 75f3452e17..5416ba5c4e 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -136,53 +136,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out b/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out index 716d60a822..fba0ce2794 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out @@ -133,34 +133,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index 018cb75ef3..cbd05d2177 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -649,53 +649,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1063,53 +1067,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index bc9afb00eb..5eddbc488a 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -326,53 +326,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -751,53 +755,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1236,53 +1244,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 139 Data size: 21549 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index c2ec2c208c..7cb86bc7d9 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -350,53 +350,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -775,53 +779,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index b1976d2d4e..fe07ea4413 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -334,53 +334,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -731,53 +735,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out index 111d1f8b0f..18a1cbf1df 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out @@ -431,35 +431,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index a4e3ca716a..0e704a92b2 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -179,35 +179,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -405,35 +405,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 29 Data size: 7917 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9655172 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,35 +655,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 29 Data size: 7917 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9655172 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -909,35 +909,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,35 +1145,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1381,35 +1381,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out index b635dde797..120bb97427 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out @@ -228,35 +228,35 @@ STAGE PLANS: outputColumnNames: value, key, ds Statistics: Num rows: 500 Data size: 136500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out index 77f994fede..cc18579844 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out @@ -168,35 +168,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -420,35 +420,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out index ca137dc2cd..9efa1ff4b9 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out @@ -168,35 +168,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -411,35 +411,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out index 77c8898adb..ce6cc49bf7 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out @@ -170,35 +170,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -414,35 +414,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,35 +658,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -837,35 +837,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1022,35 +1022,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1284,35 +1284,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1556,35 +1556,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index b5968b2d03..fec8114152 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -157,35 +157,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -385,35 +385,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -619,35 +619,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out index 11d17af1e0..99e734ac86 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out @@ -168,35 +168,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -406,35 +406,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 637 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 569 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out b/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out index 27f1c15a54..deae4b3756 100644 --- a/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out +++ b/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/cast1.q.out b/ql/src/test/results/clientpositive/llap/cast1.q.out index 1e2217c75b..a9c87c2586 100644 --- a/ql/src/test/results/clientpositive/llap/cast1.q.out +++ b/ql/src/test/results/clientpositive/llap/cast1.q.out @@ -56,33 +56,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7 Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary), _col24 (type: int), _col25 (type: int), _col26 (type: bigint), _col27 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), min(VALUE._col24), max(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27 + Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'LONG' (type: string), UDFToLong(_col24) (type: bigint), UDFToLong(_col25) (type: bigint), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out index d28a10406d..d60099f66c 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out @@ -73,16 +73,16 @@ STAGE PLANS: outputColumnNames: key1, value1, key2, value2 Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -111,17 +111,17 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out index 462186a104..a785100953 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out @@ -88,31 +88,31 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -258,31 +258,31 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out index f134aee107..df6c38d050 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -96,17 +96,17 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll'), min(c10), max(c10), count(CASE WHEN (c10 is null) THEN (1) ELSE (null) END), compute_bit_vector(c10, 'hll'), min(c11), max(c11), count(CASE WHEN (c11 is null) THEN (1) ELSE (null) END), compute_bit_vector(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43 + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary), 'DOUBLE' (type: string), _col36 (type: double), _col37 (type: double), _col38 (type: bigint), COALESCE(ndv_compute_bit_vector(_col39),0) (type: bigint), _col39 (type: binary), 'DOUBLE' (type: string), _col40 (type: double), _col41 (type: double), _col42 (type: bigint), COALESCE(ndv_compute_bit_vector(_col43),0) (type: bigint), _col43 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65 + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/clusterctas.q.out b/ql/src/test/results/clientpositive/llap/clusterctas.q.out index 40ceee215f..e3492c0947 100644 --- a/ql/src/test/results/clientpositive/llap/clusterctas.q.out +++ b/ql/src/test/results/clientpositive/llap/clusterctas.q.out @@ -67,31 +67,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out index bea9396a5a..b08137d882 100644 --- a/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out +++ b/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out @@ -90,22 +90,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out index fb4480863a..a3f8f5d1fb 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -86,19 +86,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -144,34 +144,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -348,12 +352,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 196 Data size: 257552 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 196 Data size: 232256 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -361,9 +365,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 196 Data size: 232256 Basic stats: PARTIAL Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -452,30 +456,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 98 Data size: 114656 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 98 Data size: 114656 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 98 Data size: 114656 Basic stats: PARTIAL Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -755,12 +759,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 196 Data size: 257552 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 196 Data size: 232256 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -768,9 +772,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 196 Data size: 232256 Basic stats: PARTIAL Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -859,30 +863,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 98 Data size: 114656 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 98 Data size: 114656 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 98 Data size: 114656 Basic stats: PARTIAL Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -1159,12 +1163,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 117 Data size: 121232 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), '11' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 117 Data size: 166072 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 117 Data size: 117400 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), '11' (type: string) @@ -1172,9 +1176,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 117 Data size: 166072 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 117 Data size: 117400 Basic stats: PARTIAL Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -1225,30 +1229,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), '11' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 81584 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 58 Data size: 57456 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 81584 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 58 Data size: 57456 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 58 Data size: 81584 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 58 Data size: 57456 Basic stats: PARTIAL Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index dc80b7ccad..1c7089318c 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -89,19 +89,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: all inputs @@ -147,34 +147,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -349,12 +353,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -362,9 +366,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -447,30 +451,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 900 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 900 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -745,12 +749,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), '11' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 734 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), '11' (type: string) @@ -758,9 +762,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 734 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -808,30 +812,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), '11' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 802 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 802 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out index 8152a824f1..70c4bb1302 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out @@ -69,11 +69,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: 2000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: 2000.0D (type: double) @@ -81,21 +81,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 2000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: 2000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -154,11 +154,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: 2000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 @@ -169,7 +169,7 @@ STAGE PLANS: Map-reduce partition columns: 2000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -221,14 +221,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: 2000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 2 @@ -243,8 +243,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1 - columns.types struct:double + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:bigint:bigint:bigint:bigint:binary:double escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -356,11 +356,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: 4000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: 4000.0D (type: double) @@ -368,21 +368,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 4000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: 4000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 4000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 4000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -441,11 +441,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: 4000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 @@ -456,7 +456,7 @@ STAGE PLANS: Map-reduce partition columns: 4000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -508,14 +508,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: 4000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 4000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 4000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 2 @@ -530,8 +530,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1 - columns.types struct:double + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:bigint:bigint:bigint:bigint:binary:double escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -605,37 +605,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(CASE WHEN (employeename is null) THEN (1) ELSE (null) END), compute_bit_vector(employeename, 'hll') keys: 2000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: 2000.0D (type: double) null sort order: z sort order: + Map-reduce partition columns: 2000.0D (type: double) - Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: 2000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 538 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 538 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -741,37 +741,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, employeesalary Statistics: Num rows: 26 Data size: 2596 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(CASE WHEN (employeename is null) THEN (1) ELSE (null) END), compute_bit_vector(employeename, 'hll') keys: employeesalary (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 2 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 664 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -881,33 +881,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename Statistics: Num rows: 26 Data size: 2300 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(CASE WHEN (employeename is null) THEN (1) ELSE (null) END), compute_bit_vector(employeename, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out index 9ddb93654e..63ab0dc22f 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out @@ -105,37 +105,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, country Statistics: Num rows: 1 Data size: 1012 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeename, 'hll'), compute_stats(employeeid, 'hll') + aggregations: max(length(employeename)), avg(COALESCE(length(employeename),0)), count(CASE WHEN (employeename is null) THEN (1) ELSE (null) END), compute_bit_vector(employeename, 'hll'), min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: 4000.0D (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 764 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: 4000.0D (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: 4000.0D (type: double), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 764 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: 4000.0D (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 764 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), 4000.0D (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 4000.0D (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 764 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 764 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -263,37 +263,37 @@ STAGE PLANS: outputColumnNames: employeeid, country Statistics: Num rows: 3 Data size: 2254 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: 2000.0D (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 3 Data size: 1044 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: 2000.0D (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: 2000.0D (type: double), _col1 (type: string) - Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct) + Statistics: Num rows: 3 Data size: 1044 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: 2000.0D (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 348 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), 2000.0D (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 2000.0D (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 348 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 348 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,37 +419,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeesalary, country Statistics: Num rows: 31 Data size: 6072 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll') keys: employeesalary (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 31 Data size: 11032 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct) + Statistics: Num rows: 31 Data size: 11032 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 15 Data size: 5336 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 15 Data size: 5336 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 5336 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -573,37 +573,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, employeesalary, country Statistics: Num rows: 54 Data size: 15386 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(CASE WHEN (employeeid is null) THEN (1) ELSE (null) END), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(CASE WHEN (employeename is null) THEN (1) ELSE (null) END), compute_bit_vector(employeename, 'hll') keys: employeesalary (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 8 Data size: 4672 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 8 Data size: 4672 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 8 Data size: 4128 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 8 Data size: 5776 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 5776 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out index 8ac436aeb7..a835c832e2 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out @@ -42,33 +42,37 @@ STAGE PLANS: outputColumnNames: user id, user name Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll') + aggregations: min(user id), max(user id), count(CASE WHEN (user id is null) THEN (1) ELSE (null) END), compute_bit_vector(user id, 'hll'), max(length(user name)), avg(COALESCE(length(user name),0)), count(CASE WHEN (user name is null) THEN (1) ELSE (null) END), compute_bit_vector(user name, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -120,33 +124,37 @@ STAGE PLANS: outputColumnNames: user id Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(user id, 'hll') + aggregations: min(user id), max(user id), count(CASE WHEN (user id is null) THEN (1) ELSE (null) END), compute_bit_vector(user id, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out index f361fda252..06afe28ee4 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out @@ -72,33 +72,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -144,19 +148,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -204,34 +208,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -287,33 +295,37 @@ STAGE PLANS: outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), max(length(desturl)), avg(COALESCE(length(desturl),0)), count(CASE WHEN (desturl is null) THEN (1) ELSE (null) END), compute_bit_vector(desturl, 'hll'), max(length(visitdate)), avg(COALESCE(length(visitdate),0)), count(CASE WHEN (visitdate is null) THEN (1) ELSE (null) END), compute_bit_vector(visitdate, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll'), max(length(useragent)), avg(COALESCE(length(useragent),0)), count(CASE WHEN (useragent is null) THEN (1) ELSE (null) END), compute_bit_vector(useragent, 'hll'), max(length(ccode)), avg(COALESCE(length(ccode),0)), count(CASE WHEN (ccode is null) THEN (1) ELSE (null) END), compute_bit_vector(ccode, 'hll'), max(length(lcode)), avg(COALESCE(length(lcode),0)), count(CASE WHEN (lcode is null) THEN (1) ELSE (null) END), compute_bit_vector(lcode, 'hll'), max(length(skeyword)), avg(COALESCE(length(skeyword),0)), count(CASE WHEN (skeyword is null) THEN (1) ELSE (null) END), compute_bit_vector(skeyword, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: struct), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary), _col24 (type: int), _col25 (type: struct), _col26 (type: bigint), _col27 (type: binary), _col28 (type: int), _col29 (type: struct), _col30 (type: bigint), _col31 (type: binary), _col32 (type: int), _col33 (type: int), _col34 (type: bigint), _col35 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), max(VALUE._col24), avg(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), max(VALUE._col28), avg(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31), min(VALUE._col32), max(VALUE._col33), count(VALUE._col34), compute_bit_vector(VALUE._col35) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col24,0)) (type: bigint), COALESCE(_col25,0) (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col28,0)) (type: bigint), COALESCE(_col29,0) (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'LONG' (type: string), UDFToLong(_col32) (type: bigint), UDFToLong(_col33) (type: bigint), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -444,33 +456,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), count(CASE WHEN (d is true) THEN (1) ELSE (null) END), count(CASE WHEN (d is false) THEN (1) ELSE (null) END), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), max(length(e)), avg(COALESCE(length(e),0)), count(CASE WHEN (e is null) THEN (1) ELSE (null) END) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: int), _col16 (type: struct), _col17 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), count(VALUE._col12), count(VALUE._col13), count(VALUE._col14), max(VALUE._col15), avg(VALUE._col16), count(VALUE._col17) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'BOOLEAN' (type: string), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), 'BINARY' (type: string), UDFToLong(COALESCE(_col15,0)) (type: bigint), COALESCE(_col16,0) (type: double), _col17 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -607,33 +623,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -679,19 +699,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -739,34 +759,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -822,33 +846,37 @@ STAGE PLANS: outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), max(length(desturl)), avg(COALESCE(length(desturl),0)), count(CASE WHEN (desturl is null) THEN (1) ELSE (null) END), compute_bit_vector(desturl, 'hll'), max(length(visitdate)), avg(COALESCE(length(visitdate),0)), count(CASE WHEN (visitdate is null) THEN (1) ELSE (null) END), compute_bit_vector(visitdate, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll'), max(length(useragent)), avg(COALESCE(length(useragent),0)), count(CASE WHEN (useragent is null) THEN (1) ELSE (null) END), compute_bit_vector(useragent, 'hll'), max(length(ccode)), avg(COALESCE(length(ccode),0)), count(CASE WHEN (ccode is null) THEN (1) ELSE (null) END), compute_bit_vector(ccode, 'hll'), max(length(lcode)), avg(COALESCE(length(lcode),0)), count(CASE WHEN (lcode is null) THEN (1) ELSE (null) END), compute_bit_vector(lcode, 'hll'), max(length(skeyword)), avg(COALESCE(length(skeyword),0)), count(CASE WHEN (skeyword is null) THEN (1) ELSE (null) END), compute_bit_vector(skeyword, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: struct), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary), _col24 (type: int), _col25 (type: struct), _col26 (type: bigint), _col27 (type: binary), _col28 (type: int), _col29 (type: struct), _col30 (type: bigint), _col31 (type: binary), _col32 (type: int), _col33 (type: int), _col34 (type: bigint), _col35 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), max(VALUE._col24), avg(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), max(VALUE._col28), avg(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31), min(VALUE._col32), max(VALUE._col33), count(VALUE._col34), compute_bit_vector(VALUE._col35) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col24,0)) (type: bigint), COALESCE(_col25,0) (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col28,0)) (type: bigint), COALESCE(_col29,0) (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'LONG' (type: string), UDFToLong(_col32) (type: bigint), UDFToLong(_col33) (type: bigint), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out b/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out index ab5cdf0cc8..44da534441 100644 --- a/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out @@ -78,33 +78,37 @@ STAGE PLANS: outputColumnNames: fl_date Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(fl_date, 'hll') + aggregations: min(fl_date), max(fl_date), count(CASE WHEN (fl_date is null) THEN (1) ELSE (null) END), compute_bit_vector(fl_date, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'DATE' (type: string), _col0 (type: date), _col1 (type: date), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out index aaf7d89d86..1f8f4ebe3e 100644 --- a/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out +++ b/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out @@ -64,37 +64,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2008-04-08' (type: string), '11' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: '2008-04-08' (type: string), '11' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2008-04-08' (type: string), '11' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/constprog_dp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dp.q.out index 1eb1001fcb..7ad583b853 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_dp.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_dp.q.out @@ -58,37 +58,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 422 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 626 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 626 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/constprog_type.q.out b/ql/src/test/results/clientpositive/llap/constprog_type.q.out index 59439a4133..ef94d406a8 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_type.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_type.q.out @@ -57,33 +57,37 @@ STAGE PLANS: outputColumnNames: d, t Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(d, 'hll'), compute_stats(t, 'hll') + aggregations: min(d), max(d), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll'), min(t), max(t), count(CASE WHEN (t is null) THEN (1) ELSE (null) END), compute_bit_vector(t, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: binary), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DATE' (type: string), _col0 (type: date), _col1 (type: date), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'TIMESTAMP' (type: string), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 693 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 693 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/cp_sel.q.out b/ql/src/test/results/clientpositive/llap/cp_sel.q.out index 4fb741011b..a3d430ca90 100644 --- a/ql/src/test/results/clientpositive/llap/cp_sel.q.out +++ b/ql/src/test/results/clientpositive/llap/cp_sel.q.out @@ -165,35 +165,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 642 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 642 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 506 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ctas.q.out b/ql/src/test/results/clientpositive/llap/ctas.q.out index 275bebd3b9..b670e39c6f 100644 --- a/ql/src/test/results/clientpositive/llap/ctas.q.out +++ b/ql/src/test/results/clientpositive/llap/ctas.q.out @@ -109,31 +109,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -320,31 +324,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -531,31 +539,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -806,31 +818,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1019,31 +1035,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ctas_colname.q.out b/ql/src/test/results/clientpositive/llap/ctas_colname.q.out index f897f06669..3a6b2195c8 100644 --- a/ql/src/test/results/clientpositive/llap/ctas_colname.q.out +++ b/ql/src/test/results/clientpositive/llap/ctas_colname.q.out @@ -69,31 +69,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4 Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(CASE WHEN (col4 is null) THEN (1) ELSE (null) END), compute_bit_vector(col4, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -285,31 +289,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -524,31 +532,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -701,33 +713,37 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -911,31 +927,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1403,31 +1423,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1598,31 +1622,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out b/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out index dc9096ee00..e8367834d6 100644 --- a/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out +++ b/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out @@ -61,33 +61,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out index 946f0c4128..15315046ae 100644 --- a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out @@ -135,34 +135,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out index b5fbb21d6e..0f28668c5f 100644 --- a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out @@ -97,33 +97,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -169,19 +173,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -229,34 +233,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -408,33 +416,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), count(CASE WHEN (d is true) THEN (1) ELSE (null) END), count(CASE WHEN (d is false) THEN (1) ELSE (null) END), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), max(length(e)), avg(COALESCE(length(e),0)), count(CASE WHEN (e is null) THEN (1) ELSE (null) END) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: int), _col16 (type: struct), _col17 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), count(VALUE._col12), count(VALUE._col13), count(VALUE._col14), max(VALUE._col15), avg(VALUE._col16), count(VALUE._col17) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'BOOLEAN' (type: string), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), 'BINARY' (type: string), UDFToLong(COALESCE(_col15,0)) (type: bigint), COALESCE(_col16,0) (type: double), _col17 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 07f0fcdc90..54fda565a7 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -116,31 +116,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 0b5808269f..3309a4b879 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -232,31 +232,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -507,31 +511,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -805,31 +813,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out b/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out index f571961a97..4f3da85f01 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out @@ -60,37 +60,37 @@ STAGE PLANS: outputColumnNames: i, static_part, dyn_part Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll') keys: static_part (type: int), dyn_part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index d9dad5c879..f3ed49d7d4 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -207,19 +207,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) null sort order: aa @@ -231,18 +231,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -349,19 +349,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -373,18 +373,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -717,19 +717,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) null sort order: aa @@ -741,18 +741,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -859,19 +859,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -883,18 +883,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1620,35 +1620,35 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1734,19 +1734,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) null sort order: aa @@ -1758,18 +1758,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1883,19 +1883,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 1787 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 1 Data size: 1787 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -1907,18 +1907,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2033,35 +2033,35 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.6 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 2 Data size: 1478 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 2 Data size: 1478 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 2 Data size: 1478 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2153,19 +2153,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.6 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 2 Data size: 1478 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 2 Data size: 1478 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Select Operator expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2182,18 +2182,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 2 Data size: 1478 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2591,35 +2591,35 @@ STAGE PLANS: outputColumnNames: si, i, b, f, t Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 5 Data size: 3260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 5 Data size: 8500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 5 Data size: 3260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: smallint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: float), _col14 (type: float), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 5 Data size: 3260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col13) (type: double), UDFToDouble(_col14) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 5 Data size: 5310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5310 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 77933b3079..d4b1a0b194 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -148,11 +148,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -160,7 +160,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -174,14 +174,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -292,11 +292,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -304,7 +304,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -316,14 +316,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -644,11 +644,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -656,7 +656,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -670,14 +670,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -788,11 +788,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -800,7 +800,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -812,14 +812,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1549,11 +1549,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1561,19 +1561,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1663,11 +1663,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1675,7 +1675,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) null sort order: aa @@ -1687,14 +1687,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1812,11 +1812,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1824,7 +1824,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -1836,14 +1836,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1962,11 +1962,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1974,19 +1974,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2082,11 +2082,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -2094,7 +2094,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Select Operator expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2111,14 +2111,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2520,11 +2520,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) @@ -2532,19 +2532,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: smallint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: float), _col14 (type: float), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col13) (type: double), UDFToDouble(_col14) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2993,11 +2993,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3005,7 +3005,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3019,14 +3019,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3116,11 +3116,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3128,7 +3128,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3142,14 +3142,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3239,11 +3239,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3251,7 +3251,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3265,14 +3265,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3362,11 +3362,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3374,7 +3374,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3388,14 +3388,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3485,11 +3485,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3497,7 +3497,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3511,14 +3511,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3608,11 +3608,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3620,7 +3620,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3634,14 +3634,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3738,11 +3738,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3750,21 +3750,21 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary), _col11 (type: float), _col12 (type: float), _col13 (type: bigint), _col14 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col11) (type: double), UDFToDouble(_col12) (type: double), _col13 (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -4231,19 +4231,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 804 Data size: 89236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.840796 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 129 Data size: 95331 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 129 Data size: 95331 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -4257,18 +4257,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 128 Data size: 94592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4375,19 +4375,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -4399,18 +4399,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4502,37 +4502,37 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 804 Data size: 89236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.840796 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 129 Data size: 95331 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 129 Data size: 95331 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 128 Data size: 94592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4624,19 +4624,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(CASE WHEN (si is null) THEN (1) ELSE (null) END), compute_bit_vector(si, 'hll'), min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(f), max(f), count(CASE WHEN (f is null) THEN (1) ELSE (null) END), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -4648,18 +4648,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3695 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4789,19 +4789,19 @@ STAGE PLANS: outputColumnNames: i, s Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll') keys: s (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 262 Data size: 67334 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 262 Data size: 67334 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -4818,19 +4818,19 @@ STAGE PLANS: outputColumnNames: i, s Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll') keys: s (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 262 Data size: 67334 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 262 Data size: 67334 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -4844,18 +4844,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 262 Data size: 67334 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4879,18 +4879,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 262 Data size: 67334 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index 1fa5c46b4f..1725ea7418 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -129,14 +129,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(CASE WHEN (ss_net_paid_inc_tax is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(CASE WHEN (ss_net_profit is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col5) (type: double), UDFToDouble(_col6) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -397,14 +397,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(CASE WHEN (ss_net_paid_inc_tax is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(CASE WHEN (ss_net_profit is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col5) (type: double), UDFToDouble(_col6) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -679,14 +679,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(CASE WHEN (ss_net_paid_inc_tax is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(CASE WHEN (ss_net_profit is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col5) (type: double), UDFToDouble(_col6) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -943,14 +943,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(CASE WHEN (ss_net_paid_inc_tax is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(CASE WHEN (ss_net_profit is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col5) (type: double), UDFToDouble(_col6) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1268,18 +1268,18 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(CASE WHEN (ss_net_paid_inc_tax is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(CASE WHEN (ss_net_profit is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col5) (type: double), UDFToDouble(_col6) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1532,18 +1532,18 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(CASE WHEN (ss_net_paid_inc_tax is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(CASE WHEN (ss_net_profit is null) THEN (1) ELSE (null) END), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col5) (type: double), UDFToDouble(_col6) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1838,35 +1838,35 @@ STAGE PLANS: outputColumnNames: k1, k2, day Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') keys: day (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 407 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 407 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 407 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2017,35 +2017,35 @@ STAGE PLANS: outputColumnNames: k1, k2, day Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll') keys: day (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 407 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 407 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 407 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index d4d3c21572..6a5b358643 100644 --- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -61,33 +61,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -185,33 +189,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -292,33 +300,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -909,33 +921,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1524,33 +1540,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 66250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2174,16 +2194,16 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE @@ -2204,48 +2224,56 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2362,16 +2390,16 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE @@ -2392,48 +2420,56 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2585,31 +2621,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2729,31 +2769,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 250 Data size: 82000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2877,31 +2921,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3020,31 +3068,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3189,31 +3241,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3308,31 +3364,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 250 Data size: 82000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3626,16 +3686,16 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3659,46 +3719,54 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 83 Data size: 27224 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(de), max(de), count(CASE WHEN (de is null) THEN (1) ELSE (null) END), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(CASE WHEN (vc is null) THEN (1) ELSE (null) END), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(5,2)), _col5 (type: decimal(5,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3805,37 +3873,37 @@ STAGE PLANS: outputColumnNames: a, b, c, p1, p2 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') keys: p1 (type: string), p2 (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 891 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 891 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3944,35 +4012,35 @@ STAGE PLANS: outputColumnNames: a, b, c, p1, p2 Statistics: Num rows: 5 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') keys: p1 (type: string), p2 (type: int) minReductionHashAggr: 0.6 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1574 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 2 Data size: 1574 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1166 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 2 Data size: 1778 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1778 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4147,19 +4215,19 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 91500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') keys: 'yesterday' (type: string), 3 (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 793 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: 'yesterday' (type: string), 3 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: 'yesterday' (type: string), 3 (type: int) - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 793 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE @@ -4180,34 +4248,34 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: 'yesterday' (type: string), 3 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 589 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), 'yesterday' (type: string), 3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'yesterday' (type: string), 3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 895 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 895 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4216,17 +4284,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -4488,31 +4560,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(CASE WHEN (a1 is null) THEN (1) ELSE (null) END), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -4788,31 +4864,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(CASE WHEN (a1 is null) THEN (1) ELSE (null) END), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -5105,31 +5185,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(CASE WHEN (a1 is null) THEN (1) ELSE (null) END), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -5370,31 +5454,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(CASE WHEN (a1 is null) THEN (1) ELSE (null) END), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -5765,33 +5853,37 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(j), max(j), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -5870,33 +5962,37 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(j), max(j), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -5990,33 +6086,37 @@ STAGE PLANS: outputColumnNames: i Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -6086,33 +6186,37 @@ STAGE PLANS: outputColumnNames: i Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -6229,35 +6333,35 @@ STAGE PLANS: outputColumnNames: key, key_mm Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: key_mm (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out index 7756c05c2d..0efb345c52 100644 --- a/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out @@ -42,33 +42,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/explain_ddl.q.out b/ql/src/test/results/clientpositive/llap/explain_ddl.q.out index 20a7ee846f..4e01e546c7 100644 --- a/ql/src/test/results/clientpositive/llap/explain_ddl.q.out +++ b/ql/src/test/results/clientpositive/llap/explain_ddl.q.out @@ -89,33 +89,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -189,33 +193,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -293,33 +301,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -393,33 +405,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -580,33 +596,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 4f88488642..49f14302df 100644 --- a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -1213,241 +1213,247 @@ Stage-5 Stage-3 Reducer 5 llap File Output Operator [FS_81] - Group By Operator [GBY_79] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Union 4 [CUSTOM_SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] llap - File Output Operator [FS_233] - table:{"name:":"default.a_n14"} - Select Operator [SEL_231] (rows=193/820 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_230] (rows=193/820 width=175) - Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_69] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_202] (rows=39/115 width=264) - Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_66] + Select Operator [SEL_80] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_79] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Union 4 [CUSTOM_SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + File Output Operator [FS_233] + table:{"name:":"default.a_n14"} + Select Operator [SEL_231] (rows=193/820 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_230] (rows=193/820 width=175) + Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_69] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_202] (rows=39/115 width=264) + Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_66] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_120] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_67] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_129] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_49] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] llap + Reduce Output Operator [RS_271] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_67] + Select Operator [SEL_269] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_268] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_267] (rows=25/25 width=89) + Output:["value"] + <-Map 24 [CONTAINS] llap + Reduce Output Operator [RS_276] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_129] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 23 [SIMPLE_EDGE] - <-Map 22 [CONTAINS] llap - Reduce Output Operator [RS_271] - PartitionCols:_col0 - Select Operator [SEL_269] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_268] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_267] (rows=25/25 width=89) - Output:["value"] - <-Map 24 [CONTAINS] llap - Reduce Output Operator [RS_276] - PartitionCols:_col0 - Select Operator [SEL_274] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_273] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_272] (rows=500/500 width=91) - Output:["value"] - <-Map 25 [CONTAINS] llap - Reduce Output Operator [RS_281] - PartitionCols:_col0 - Select Operator [SEL_279] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_278] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_277] (rows=500/500 width=91) - Output:["value"] - <-Map 26 [CONTAINS] llap - Reduce Output Operator [RS_286] - PartitionCols:_col0 - Select Operator [SEL_284] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_283] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_282] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_239] - Select Operator [SEL_234] (rows=2899/820 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_235] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_240] - Select Operator [SEL_236] (rows=2899/820 width=178) - Output:["key","value"] + Select Operator [SEL_274] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_273] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_272] (rows=500/500 width=91) + Output:["value"] + <-Map 25 [CONTAINS] llap + Reduce Output Operator [RS_281] + PartitionCols:_col0 + Select Operator [SEL_279] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_278] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_277] (rows=500/500 width=91) + Output:["value"] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_286] + PartitionCols:_col0 + Select Operator [SEL_284] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_283] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_282] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_239] + Select Operator [SEL_234] (rows=2899/820 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_231] + File Output Operator [FS_235] + table:{"name:":"default.b_n10"} Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_237] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_241] - Select Operator [SEL_238] (rows=2899/820 width=178) - Output:["key","value"] + Reduce Output Operator [RS_240] + Select Operator [SEL_236] (rows=2899/820 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_231] + File Output Operator [FS_237] + table:{"name:":"default.c_n3"} Please refer to the previous Select Operator [SEL_231] - <-Reducer 3 [CONTAINS] llap - File Output Operator [FS_209] - table:{"name:":"default.a_n14"} - Select Operator [SEL_207] (rows=66/170 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_206] (rows=66/170 width=177) - Conds:RS_17._col3=Union 14._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_200] (rows=39/37 width=266) - Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + Reduce Output Operator [RS_241] + Select Operator [SEL_238] (rows=2899/820 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_231] + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_209] + table:{"name:":"default.a_n14"} + Select Operator [SEL_207] (rows=66/170 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_206] (rows=66/170 width=177) + Conds:RS_17._col3=Union 14._col0(Inner),Output:["_col1","_col2"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_200] (rows=39/37 width=266) + Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_119] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_0] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_5] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] llap + Reduce Output Operator [RS_246] PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_119] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + Select Operator [SEL_244] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_243] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_242] (rows=25/25 width=89) + Output:["value"] + <-Map 15 [CONTAINS] llap + Reduce Output Operator [RS_251] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] llap - Reduce Output Operator [RS_246] - PartitionCols:_col0 - Select Operator [SEL_244] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_243] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_242] (rows=25/25 width=89) - Output:["value"] - <-Map 15 [CONTAINS] llap - Reduce Output Operator [RS_251] - PartitionCols:_col0 - Select Operator [SEL_249] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_248] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_247] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_215] - Select Operator [SEL_210] (rows=2899/170 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_207] - File Output Operator [FS_211] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_207] - Reduce Output Operator [RS_216] - Select Operator [SEL_212] (rows=2899/170 width=178) - Output:["key","value"] + Select Operator [SEL_249] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_248] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_247] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_215] + Select Operator [SEL_210] (rows=2899/170 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_207] + File Output Operator [FS_211] + table:{"name:":"default.b_n10"} Please refer to the previous Select Operator [SEL_207] - File Output Operator [FS_213] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_207] - Reduce Output Operator [RS_217] - Select Operator [SEL_214] (rows=2899/170 width=178) - Output:["key","value"] + Reduce Output Operator [RS_216] + Select Operator [SEL_212] (rows=2899/170 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_207] + File Output Operator [FS_213] + table:{"name:":"default.c_n3"} Please refer to the previous Select Operator [SEL_207] - <-Reducer 9 [CONTAINS] llap - File Output Operator [FS_221] - table:{"name:":"default.a_n14"} - Select Operator [SEL_219] (rows=2640/5421 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) - Conds:RS_41._col1=Union 18._col0(Inner),Output:["_col0","_col3"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) - Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] + Reduce Output Operator [RS_217] + Select Operator [SEL_214] (rows=2899/170 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_207] + <-Reducer 9 [CONTAINS] llap + File Output Operator [FS_221] + table:{"name:":"default.a_n14"} + Select Operator [SEL_219] (rows=2640/5421 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) + Conds:RS_41._col1=Union 18._col0(Inner),Output:["_col0","_col3"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) + Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_123] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + Please refer to the previous TableScan [TS_0] + <-Map 16 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_24] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] llap + Reduce Output Operator [RS_256] PartitionCols:_col0 - Select Operator [SEL_23] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] - <-Map 16 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] + Select Operator [SEL_254] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_253] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_252] (rows=25/25 width=89) + Output:["value"] + <-Map 19 [CONTAINS] llap + Reduce Output Operator [RS_261] PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_24] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] llap - Reduce Output Operator [RS_256] - PartitionCols:_col0 - Select Operator [SEL_254] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_253] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_252] (rows=25/25 width=89) - Output:["value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_261] - PartitionCols:_col0 - Select Operator [SEL_259] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_258] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_257] (rows=500/500 width=91) - Output:["value"] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_266] - PartitionCols:_col0 - Select Operator [SEL_264] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_263] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_262] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_227] - Select Operator [SEL_222] (rows=2899/5421 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_223] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_228] - Select Operator [SEL_224] (rows=2899/5421 width=178) - Output:["key","value"] + Select Operator [SEL_259] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_258] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_257] (rows=500/500 width=91) + Output:["value"] + <-Map 20 [CONTAINS] llap + Reduce Output Operator [RS_266] + PartitionCols:_col0 + Select Operator [SEL_264] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_263] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_262] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_227] + Select Operator [SEL_222] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + File Output Operator [FS_223] + table:{"name:":"default.b_n10"} Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_225] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_229] - Select Operator [SEL_226] (rows=2899/5421 width=178) - Output:["key","value"] + Reduce Output Operator [RS_228] + Select Operator [SEL_224] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + File Output Operator [FS_225] + table:{"name:":"default.c_n3"} Please refer to the previous Select Operator [SEL_219] + Reduce Output Operator [RS_229] + Select Operator [SEL_226] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] Reducer 6 llap File Output Operator [FS_89] - Group By Operator [GBY_87] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_88] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_87] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Reducer 7 llap File Output Operator [FS_97] - Group By Operator [GBY_95] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_96] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_95] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -1583,252 +1589,258 @@ Stage-5 Stage-3 Reducer 10 llap File Output Operator [FS_137] - Group By Operator [GBY_135] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_134] - Select Operator [SEL_133] (rows=2899/319 width=178) - Output:["key","value"] - Group By Operator [GBY_112] (rows=2899/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] llap - Reduce Output Operator [RS_260] - PartitionCols:_col0, _col1 - Select Operator [SEL_258] (rows=193/304 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_257] (rows=193/304 width=175) - Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0","_col3"] - <-Reducer 14 [SIMPLE_EDGE] llap - SHUFFLE [RS_104] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_242] (rows=39/115 width=264) - Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_101] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 28 [SIMPLE_EDGE] llap - SHUFFLE [RS_102] - PartitionCols:_col0 - Select Operator [SEL_70] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_169] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_68] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 35 [SIMPLE_EDGE] llap - SHUFFLE [RS_105] - PartitionCols:_col0 - Select Operator [SEL_100] (rows=1525/319 width=91) - Output:["_col0"] - Group By Operator [GBY_99] (rows=1525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 34 [SIMPLE_EDGE] - <-Map 38 [CONTAINS] llap - Reduce Output Operator [RS_317] - PartitionCols:_col1, _col0 - Select Operator [SEL_315] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_314] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_313] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 33 [CONTAINS] llap - Reduce Output Operator [RS_302] - PartitionCols:_col1, _col0 - Select Operator [SEL_300] (rows=1025/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_299] (rows=1025/319 width=178) + Select Operator [SEL_136] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_135] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_134] + Select Operator [SEL_133] (rows=2899/319 width=178) + Output:["key","value"] + Group By Operator [GBY_112] (rows=2899/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] llap + Reduce Output Operator [RS_260] + PartitionCols:_col0, _col1 + Select Operator [SEL_258] (rows=193/304 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_257] (rows=193/304 width=175) + Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0","_col3"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_104] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_242] (rows=39/115 width=264) + Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 28 [SIMPLE_EDGE] llap + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_169] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_68] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 35 [SIMPLE_EDGE] llap + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_100] (rows=1525/319 width=91) + Output:["_col0"] + Group By Operator [GBY_99] (rows=1525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 34 [SIMPLE_EDGE] + <-Map 38 [CONTAINS] llap + Reduce Output Operator [RS_317] + PartitionCols:_col1, _col0 + Select Operator [SEL_315] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_314] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_313] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 33 [CONTAINS] llap + Reduce Output Operator [RS_302] + PartitionCols:_col1, _col0 + Select Operator [SEL_300] (rows=1025/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_299] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 32 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] llap + Reduce Output Operator [RS_312] + PartitionCols:_col1, _col0 + Select Operator [SEL_310] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_309] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_308] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 31 [CONTAINS] llap + Reduce Output Operator [RS_298] + PartitionCols:_col1, _col0 + Select Operator [SEL_296] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_295] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 30 [SIMPLE_EDGE] + <-Map 29 [CONTAINS] llap + Reduce Output Operator [RS_294] + PartitionCols:_col1, _col0 + Select Operator [SEL_292] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_291] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_290] (rows=25/25 width=175) + Output:["key","value"] + <-Map 36 [CONTAINS] llap + Reduce Output Operator [RS_307] + PartitionCols:_col1, _col0 + Select Operator [SEL_305] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_304] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_303] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 5 [CONTAINS] llap + Reduce Output Operator [RS_252] + PartitionCols:_col0, _col1 + Group By Operator [GBY_250] (rows=2706/309 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 4 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + Reduce Output Operator [RS_256] + PartitionCols:_col0, _col1 + Select Operator [SEL_254] (rows=2640/1056 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_253] (rows=2640/1056 width=178) + Conds:RS_55._col1=RS_56._col0(Inner),Output:["_col0","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_241] (rows=791/1028 width=269) + Conds:RS_52._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_52] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 20 [SIMPLE_EDGE] llap + SHUFFLE [RS_53] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_28] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 25 [SIMPLE_EDGE] llap + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=1025/319 width=91) + Output:["_col0"] + Group By Operator [GBY_50] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 32 [SIMPLE_EDGE] - <-Map 37 [CONTAINS] llap - Reduce Output Operator [RS_312] + <-Union 24 [SIMPLE_EDGE] + <-Map 27 [CONTAINS] llap + Reduce Output Operator [RS_289] PartitionCols:_col1, _col0 - Select Operator [SEL_310] (rows=500/500 width=178) + Select Operator [SEL_287] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_309] (rows=500/500 width=178) + Filter Operator [FIL_286] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_308] (rows=500/500 width=178) + TableScan [TS_285] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 31 [CONTAINS] llap - Reduce Output Operator [RS_298] + <-Reducer 23 [CONTAINS] llap + Reduce Output Operator [RS_279] PartitionCols:_col1, _col0 - Select Operator [SEL_296] (rows=525/319 width=178) + Select Operator [SEL_277] (rows=525/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_295] (rows=525/319 width=178) + Group By Operator [GBY_276] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 30 [SIMPLE_EDGE] - <-Map 29 [CONTAINS] llap - Reduce Output Operator [RS_294] + <-Union 22 [SIMPLE_EDGE] + <-Map 21 [CONTAINS] llap + Reduce Output Operator [RS_275] PartitionCols:_col1, _col0 - Select Operator [SEL_292] (rows=25/25 width=175) + Select Operator [SEL_273] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=25/25 width=175) + Filter Operator [FIL_272] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_290] (rows=25/25 width=175) + TableScan [TS_271] (rows=25/25 width=175) Output:["key","value"] - <-Map 36 [CONTAINS] llap - Reduce Output Operator [RS_307] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_284] PartitionCols:_col1, _col0 - Select Operator [SEL_305] (rows=500/500 width=178) + Select Operator [SEL_282] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_304] (rows=500/500 width=178) + Filter Operator [FIL_281] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_303] (rows=500/500 width=178) + TableScan [TS_280] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 5 [CONTAINS] llap - Reduce Output Operator [RS_252] - PartitionCols:_col0, _col1 - Group By Operator [GBY_250] (rows=2706/309 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] llap - Reduce Output Operator [RS_256] - PartitionCols:_col0, _col1 - Select Operator [SEL_254] (rows=2640/1056 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_253] (rows=2640/1056 width=178) - Conds:RS_55._col1=RS_56._col0(Inner),Output:["_col0","_col3"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_241] (rows=791/1028 width=269) - Conds:RS_52._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_27] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 20 [SIMPLE_EDGE] llap - SHUFFLE [RS_53] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_28] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 25 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=1025/319 width=91) - Output:["_col0"] - Group By Operator [GBY_50] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 27 [CONTAINS] llap - Reduce Output Operator [RS_289] - PartitionCols:_col1, _col0 - Select Operator [SEL_287] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_286] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_285] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 23 [CONTAINS] llap - Reduce Output Operator [RS_279] - PartitionCols:_col1, _col0 - Select Operator [SEL_277] (rows=525/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_276] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] llap - Reduce Output Operator [RS_275] - PartitionCols:_col1, _col0 - Select Operator [SEL_273] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_272] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_271] (rows=25/25 width=175) - Output:["key","value"] - <-Map 26 [CONTAINS] llap - Reduce Output Operator [RS_284] - PartitionCols:_col1, _col0 - Select Operator [SEL_282] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_281] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_280] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_249] - PartitionCols:_col0, _col1 - Select Operator [SEL_247] (rows=66/61 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_246] (rows=66/61 width=177) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2"] - <-Reducer 18 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=525/319 width=91) - Output:["_col0"] - Group By Operator [GBY_16] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] llap - Reduce Output Operator [RS_265] - PartitionCols:_col1, _col0 - Select Operator [SEL_263] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_262] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_261] (rows=25/25 width=175) - Output:["key","value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_270] - PartitionCols:_col1, _col0 - Select Operator [SEL_268] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_267] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_266] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_240] (rows=39/37 width=266) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500/500 width=178) - predicate:key is not null - Please refer to the previous TableScan [TS_0] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] + <-Reducer 3 [CONTAINS] llap + Reduce Output Operator [RS_249] + PartitionCols:_col0, _col1 + Select Operator [SEL_247] (rows=66/61 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_246] (rows=66/61 width=177) + Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2"] + <-Reducer 18 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=525/319 width=91) + Output:["_col0"] + Group By Operator [GBY_16] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] llap + Reduce Output Operator [RS_265] + PartitionCols:_col1, _col0 + Select Operator [SEL_263] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_262] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_261] (rows=25/25 width=175) + Output:["key","value"] + <-Map 19 [CONTAINS] llap + Reduce Output Operator [RS_270] + PartitionCols:_col1, _col0 + Select Operator [SEL_268] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_267] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_266] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_240] (rows=39/37 width=266) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=500/500 width=178) + predicate:key is not null + Please refer to the previous TableScan [TS_0] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_5] Reducer 8 llap File Output Operator [FS_121] - Group By Operator [GBY_119] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_118] - Select Operator [SEL_117] (rows=2899/319 width=178) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_112] + Select Operator [SEL_120] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_119] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_118] + Select Operator [SEL_117] (rows=2899/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] Reducer 9 llap File Output Operator [FS_129] - Group By Operator [GBY_127] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=2899/319 width=178) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_112] + Select Operator [SEL_128] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_127] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_126] + Select Operator [SEL_125] (rows=2899/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] Stage-6 Stats Work{} Stage-1 @@ -1916,57 +1928,61 @@ Stage-4 Stage-2 Reducer 6 llap File Output Operator [FS_24] - Group By Operator [GBY_22] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_18] - table:{"name:":"default.dest1_n105"} - Select Operator [SEL_16] (rows=316/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=316/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=501/310 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_44] - PartitionCols:_col0, _col1 - Select Operator [SEL_42] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_41] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_40] - PartitionCols:_col0, _col1 - Select Operator [SEL_38] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - PARTITION_ONLY_SHUFFLE [RS_21] - Select Operator [SEL_20] (rows=316/310 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_16] + Select Operator [SEL_23] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_22] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_18] + table:{"name:":"default.dest1_n105"} + Select Operator [SEL_16] (rows=316/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=316/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=501/310 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_44] + PartitionCols:_col0, _col1 + Select Operator [SEL_42] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_41] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_40] + PartitionCols:_col0, _col1 + Select Operator [SEL_38] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_21] + Select Operator [SEL_20] (rows=316/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] Reducer 7 llap File Output Operator [FS_36] - Group By Operator [GBY_34] (rows=1/1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=501/310 width=456) - Output:["key","val1","val2"] - Select Operator [SEL_28] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_27] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_11] + Select Operator [SEL_35] (rows=1/1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_34] (rows=1/1 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=501/310 width=456) + Output:["key","val1","val2"] + Select Operator [SEL_28] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_27] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_11] Stage-5 Stats Work{} Stage-1 @@ -2128,71 +2144,75 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_22] - Group By Operator [GBY_20] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_16] - table:{"name:":"default.dest1_n105"} - Select Operator [SEL_14] (rows=316/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=316/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_43] - PartitionCols:_col0 - Select Operator [SEL_41] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_40] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_44] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_41] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_48] - PartitionCols:_col0 - Select Operator [SEL_46] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_45] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_49] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_46] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_36] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_35] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_39] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_36] - PARTITION_ONLY_SHUFFLE [RS_19] - Select Operator [SEL_18] (rows=316/310 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_14] + Select Operator [SEL_21] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_20] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_16] + table:{"name:":"default.dest1_n105"} + Select Operator [SEL_14] (rows=316/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=316/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_43] + PartitionCols:_col0 + Select Operator [SEL_41] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_40] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_44] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_41] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_48] + PartitionCols:_col0 + Select Operator [SEL_46] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_45] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_49] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_46] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_36] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_35] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_39] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_36] + PARTITION_ONLY_SHUFFLE [RS_19] + Select Operator [SEL_18] (rows=316/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] Reducer 7 llap File Output Operator [FS_34] - Group By Operator [GBY_32] (rows=1/1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_28] - table:{"name:":"default.dest2_n29"} - Select Operator [SEL_26] (rows=1001/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_25] (rows=1001/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_30] (rows=1001/310 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_26] + Select Operator [SEL_33] (rows=1/1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_32] (rows=1/1 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12, 'hll')"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_28] + table:{"name:":"default.dest2_n29"} + Select Operator [SEL_26] (rows=1001/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_25] (rows=1001/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_31] + Select Operator [SEL_30] (rows=1001/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_26] Stage-5 Stats Work{} Stage-1 @@ -2270,61 +2290,65 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_14] - table:{"name:":"default.dest1_n105"} - Select Operator [SEL_12] (rows=316/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_11] (rows=316/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_41] - PartitionCols:_col0 - Select Operator [SEL_39] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_38] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_42] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_39] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_33] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_37] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_34] - PARTITION_ONLY_SHUFFLE [RS_17] - Select Operator [SEL_16] (rows=316/310 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_12] + Select Operator [SEL_19] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_18] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_14] + table:{"name:":"default.dest1_n105"} + Select Operator [SEL_12] (rows=316/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_11] (rows=316/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_41] + PartitionCols:_col0 + Select Operator [SEL_39] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_38] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_39] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_33] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_37] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_34] + PARTITION_ONLY_SHUFFLE [RS_17] + Select Operator [SEL_16] (rows=316/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_12] Reducer 7 llap File Output Operator [FS_32] - Group By Operator [GBY_30] (rows=1/1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_26] - table:{"name:":"default.dest2_n29"} - Select Operator [SEL_24] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_23] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_28] (rows=501/310 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_24] + Select Operator [SEL_31] (rows=1/1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_30] (rows=1/1 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12, 'hll')"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_26] + table:{"name:":"default.dest2_n29"} + Select Operator [SEL_24] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_23] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_29] + Select Operator [SEL_28] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_24] Stage-5 Stats Work{} Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index f10356ab47..e9d66b9e6f 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -64,10 +64,10 @@ Stage-3 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_8] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + Select Operator [SEL_9] (rows=1 width=727) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_8] (rows=1 width=521) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap File Output Operator [FS_3] table:{"name:":"default.src_orc_merge_test_part_n1"} @@ -77,8 +77,8 @@ Stage-3 default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=1061) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"],keys:ds, ts + Group By Operator [GBY_6] (rows=1 width=589) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["min(key)","max(key)","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"],keys:ds, ts Select Operator [SEL_5] (rows=500 width=292) Output:["key","value","ds","ts"] Please refer to the previous Select Operator [SEL_1] @@ -117,10 +117,10 @@ Stage-3 Stage-1 Reducer 3 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_12] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + Select Operator [SEL_13] (rows=1 width=727) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_12] (rows=1 width=521) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap File Output Operator [FS_7] table:{"name:":"default.src_orc_merge_test_part_n1"} @@ -140,8 +140,8 @@ Stage-3 default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] SHUFFLE [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=1 width=1061) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"],keys:ds, ts + Group By Operator [GBY_10] (rows=1 width=589) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["min(key)","max(key)","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"],keys:ds, ts Select Operator [SEL_9] (rows=100 width=292) Output:["key","value","ds","ts"] Please refer to the previous Select Operator [SEL_6] @@ -3735,35 +3735,37 @@ Stage-3 Stage-1 Reducer 4 llap File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] - table:{"name:":"default.nzhang_CTAS1_n1"} - Limit [LIM_7] (rows=10 width=178) - Number of rows:10 - Select Operator [SEL_6] (rows=10 width=178) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=178) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=178) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=178) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:key, value,top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=178) - Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Select Operator [SEL_16] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_15] (rows=1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_8] + table:{"name:":"default.nzhang_CTAS1_n1"} + Limit [LIM_7] (rows=10 width=178) + Number of rows:10 + Select Operator [SEL_6] (rows=10 width=178) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_5] + Top N Key Operator [TNK_18] (rows=10 width=178) + keys:_col0, _col1,top n:10 + Limit [LIM_4] (rows=10 width=178) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:key, value,top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_14] + Select Operator [SEL_13] (rows=10 width=178) + Output:["col1","col2"] + Please refer to the previous Limit [LIM_7] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -3806,35 +3808,37 @@ Stage-3 Stage-1 Reducer 4 llap File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] - table:{"name:":"default.nzhang_ctas3_n1"} - Limit [LIM_7] (rows=10 width=192) - Number of rows:10 - Select Operator [SEL_6] (rows=10 width=192) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=192) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=192) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=192) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=192) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:(key / 2), concat(value, '_con'),top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=192) - Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Select Operator [SEL_16] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_15] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["min(VALUE._col0)","max(VALUE._col0)","count(VALUE._col1)","compute_bit_vector(VALUE._col0, 'hll')","max(VALUE._col3)","avg(VALUE._col4)","count(VALUE._col5)","compute_bit_vector(VALUE._col6, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_8] + table:{"name:":"default.nzhang_ctas3_n1"} + Limit [LIM_7] (rows=10 width=192) + Number of rows:10 + Select Operator [SEL_6] (rows=10 width=192) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_5] + Top N Key Operator [TNK_18] (rows=10 width=192) + keys:_col0, _col1,top n:10 + Limit [LIM_4] (rows=10 width=192) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=192) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=192) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:(key / 2), concat(value, '_con'),top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_14] + Select Operator [SEL_13] (rows=10 width=192) + Output:["col1","col2"] + Please refer to the previous Limit [LIM_7] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -5571,73 +5575,77 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=2640) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_14] - Group By Operator [GBY_13] (rows=1 width=2576) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')","compute_stats(VALUE._col6, 'hll')"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_9] - table:{"name:":"default.part_4_n1"} - Select Operator [SEL_7] (rows=26 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_6] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_5] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col2 - PTF Operator [PTF_3] (rows=26 width=499) - Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}}] - Select Operator [SEL_2] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_1] - PartitionCols:p_mfgr - TableScan [TS_0] (rows=26 width=231) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] - PARTITION_ONLY_SHUFFLE [RS_12] - PartitionCols:rand() - Select Operator [SEL_11] (rows=26 width=239) - Output:["p_mfgr","p_name","p_size","r","dr","s"] - Please refer to the previous Select Operator [SEL_7] + Select Operator [SEL_16] (rows=1 width=1590) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] + Group By Operator [GBY_15] (rows=1 width=976) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)","min(VALUE._col8)","max(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col11)","min(VALUE._col12)","max(VALUE._col13)","count(VALUE._col14)","compute_bit_vector(VALUE._col15)","min(VALUE._col16)","max(VALUE._col17)","count(VALUE._col18)","compute_bit_vector(VALUE._col19)","min(VALUE._col20)","max(VALUE._col21)","count(VALUE._col22)","compute_bit_vector(VALUE._col23)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_14] + Group By Operator [GBY_13] (rows=1 width=1112) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')","min(VALUE._col9)","max(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col9, 'hll')","min(VALUE._col11)","max(VALUE._col11)","count(VALUE._col12)","compute_bit_vector(VALUE._col11, 'hll')","min(VALUE._col13)","max(VALUE._col13)","count(VALUE._col14)","compute_bit_vector(VALUE._col13, 'hll')","min(VALUE._col15)","max(VALUE._col15)","count(VALUE._col16)","compute_bit_vector(VALUE._col15, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_9] + table:{"name:":"default.part_4_n1"} + Select Operator [SEL_7] (rows=26 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_6] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}] + Select Operator [SEL_5] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col2 + PTF Operator [PTF_3] (rows=26 width=499) + Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}}] + Select Operator [SEL_2] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_1] + PartitionCols:p_mfgr + TableScan [TS_0] (rows=26 width=231) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] + PARTITION_ONLY_SHUFFLE [RS_12] + PartitionCols:rand() + Select Operator [SEL_11] (rows=26 width=239) + Output:["p_mfgr","p_name","p_size","r","dr","s"] + Please refer to the previous Select Operator [SEL_7] Reducer 9 llap File Output Operator [FS_36] - Group By Operator [GBY_34] (rows=1 width=3520) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)","compute_stats(VALUE._col6)","compute_stats(VALUE._col7)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_33] - Group By Operator [GBY_32] (rows=1 width=3424) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')","compute_stats(VALUE._col6, 'hll')","compute_stats(VALUE._col7, 'hll')","compute_stats(VALUE._col8, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_28] - table:{"name:":"default.part_5_n1"} - Select Operator [SEL_25] (rows=26 width=247) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - PTF Operator [PTF_24] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col3"}] - Select Operator [SEL_23] (rows=26 width=499) - Output:["_col0","_col2","_col3","_col6"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col2 - Select Operator [SEL_21] (rows=26 width=491) - Output:["sum_window_0","_col1","_col2","_col5"] - PTF Operator [PTF_20] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_19] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col2 - Please refer to the previous PTF Operator [PTF_3] - PARTITION_ONLY_SHUFFLE [RS_31] - PartitionCols:rand() - Select Operator [SEL_30] (rows=26 width=247) - Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] - Please refer to the previous Select Operator [SEL_25] + Select Operator [SEL_35] (rows=1 width=2118) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47"] + Group By Operator [GBY_34] (rows=1 width=1296) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)","min(VALUE._col8)","max(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col11)","min(VALUE._col12)","max(VALUE._col13)","count(VALUE._col14)","compute_bit_vector(VALUE._col15)","min(VALUE._col16)","max(VALUE._col17)","count(VALUE._col18)","compute_bit_vector(VALUE._col19)","min(VALUE._col20)","max(VALUE._col21)","count(VALUE._col22)","compute_bit_vector(VALUE._col23)","min(VALUE._col24)","max(VALUE._col25)","count(VALUE._col26)","compute_bit_vector(VALUE._col27)","min(VALUE._col28)","max(VALUE._col29)","count(VALUE._col30)","compute_bit_vector(VALUE._col31)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=1432) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')","min(VALUE._col9)","max(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col9, 'hll')","min(VALUE._col11)","max(VALUE._col11)","count(VALUE._col12)","compute_bit_vector(VALUE._col11, 'hll')","min(VALUE._col13)","max(VALUE._col13)","count(VALUE._col14)","compute_bit_vector(VALUE._col13, 'hll')","min(VALUE._col15)","max(VALUE._col15)","count(VALUE._col16)","compute_bit_vector(VALUE._col15, 'hll')","min(VALUE._col17)","max(VALUE._col17)","count(VALUE._col18)","compute_bit_vector(VALUE._col17, 'hll')","min(VALUE._col19)","max(VALUE._col19)","count(VALUE._col20)","compute_bit_vector(VALUE._col19, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_28] + table:{"name:":"default.part_5_n1"} + Select Operator [SEL_25] (rows=26 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + PTF Operator [PTF_24] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col3"}] + Select Operator [SEL_23] (rows=26 width=499) + Output:["_col0","_col2","_col3","_col6"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col2 + Select Operator [SEL_21] (rows=26 width=491) + Output:["sum_window_0","_col1","_col2","_col5"] + PTF Operator [PTF_20] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS LAST","partition by:":"_col2"}] + Select Operator [SEL_19] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col2 + Please refer to the previous PTF Operator [PTF_3] + PARTITION_ONLY_SHUFFLE [RS_31] + PartitionCols:rand() + Select Operator [SEL_30] (rows=26 width=247) + Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] + Please refer to the previous Select Operator [SEL_25] Stage-5 Stats Work{} Stage-1 @@ -6039,42 +6047,44 @@ Stage-3 Stage-1 Reducer 4 llap File Output Operator [FS_19] - Group By Operator [GBY_17] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_16] - Group By Operator [GBY_15] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_11] - table:{"name:":"default.dest_j1_n16"} - Select Operator [SEL_9] (rows=791 width=95) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_34] (rows=791 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_22] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - PartitionCols:rand() - Select Operator [SEL_13] (rows=791 width=95) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_9] + Select Operator [SEL_18] (rows=1 width=530) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_17] (rows=1 width=324) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_16] + Group By Operator [GBY_15] (rows=1 width=392) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["min(VALUE._col0)","max(VALUE._col0)","count(VALUE._col1)","compute_bit_vector(VALUE._col0, 'hll')","max(VALUE._col3)","avg(VALUE._col4)","count(VALUE._col5)","compute_bit_vector(VALUE._col6, 'hll')"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_11] + table:{"name:":"default.dest_j1_n16"} + Select Operator [SEL_9] (rows=791 width=95) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_34] (rows=791 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_22] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_14] + PartitionCols:rand() + Select Operator [SEL_13] (rows=791 width=95) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_9] PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1_n16 select src1.key, src2.value diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index f7ef019f5b..fa16e1a9ee 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -2767,359 +2767,365 @@ Stage-5 Stage-3 Reducer 4 llap File Output Operator [FS_82] - Group By Operator [GBY_80] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Union 3 [CUSTOM_SIMPLE_EDGE] - <-Map 18 [CONTAINS] llap - File Output Operator [FS_286] - table:{"name:":"default.a_n19"} - Select Operator [SEL_283] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_281] (rows=1844 width=10) - Conds:MAPJOIN_280._col1=RS_396._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_396] - PartitionCols:_col0 - Select Operator [SEL_395] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_394] (rows=25 width=7) - predicate:key is not null - TableScan [TS_63] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_280] (rows=1677 width=10) - Conds:SEL_282._col0=RS_367._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_367] - PartitionCols:_col1 - Select Operator [SEL_365] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_364] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_282] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_279] (rows=25 width=7) - predicate:value is not null - TableScan [TS_276] (rows=25 width=7) - Output:["value"] - Reduce Output Operator [RS_295] - Group By Operator [GBY_292] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_287] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - File Output Operator [FS_288] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_283] - Reduce Output Operator [RS_296] - Group By Operator [GBY_293] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_289] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - File Output Operator [FS_290] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_283] - Reduce Output Operator [RS_297] - Group By Operator [GBY_294] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_291] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - <-Map 19 [CONTAINS] llap - File Output Operator [FS_308] - table:{"name:":"default.a_n19"} - Select Operator [SEL_305] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_303] (rows=1844 width=10) - Conds:MAPJOIN_302._col1=RS_397._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_397] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_302] (rows=1677 width=10) - Conds:SEL_304._col0=RS_368._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_368] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_304] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_301] (rows=500 width=10) - predicate:value is not null - TableScan [TS_298] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_317] - Group By Operator [GBY_314] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_309] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - File Output Operator [FS_310] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_305] - Reduce Output Operator [RS_318] - Group By Operator [GBY_315] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_311] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - File Output Operator [FS_312] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_305] - Reduce Output Operator [RS_319] - Group By Operator [GBY_316] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_313] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - <-Map 20 [CONTAINS] llap - File Output Operator [FS_330] - table:{"name:":"default.a_n19"} - Select Operator [SEL_327] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_325] (rows=1844 width=10) - Conds:MAPJOIN_324._col1=RS_398._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_398] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_324] (rows=1677 width=10) - Conds:SEL_326._col0=RS_369._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_369] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_326] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_323] (rows=500 width=10) - predicate:value is not null - TableScan [TS_320] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_339] - Group By Operator [GBY_336] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_331] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - File Output Operator [FS_332] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_327] - Reduce Output Operator [RS_340] - Group By Operator [GBY_337] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_333] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - File Output Operator [FS_334] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_327] - Reduce Output Operator [RS_341] - Group By Operator [GBY_338] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_335] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - <-Map 21 [CONTAINS] llap - File Output Operator [FS_352] - table:{"name:":"default.a_n19"} - Select Operator [SEL_349] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_347] (rows=1844 width=10) - Conds:MAPJOIN_346._col1=RS_399._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_399] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_346] (rows=1677 width=10) - Conds:SEL_348._col0=RS_370._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_370] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_348] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_345] (rows=500 width=10) - predicate:value is not null - TableScan [TS_342] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_361] - Group By Operator [GBY_358] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_353] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - File Output Operator [FS_354] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_349] - Reduce Output Operator [RS_362] - Group By Operator [GBY_359] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_355] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - File Output Operator [FS_356] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_349] - Reduce Output Operator [RS_363] - Group By Operator [GBY_360] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_357] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_224] - table:{"name:":"default.a_n19"} - Select Operator [SEL_222] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_221] (rows=605 width=10) - Conds:RS_375._col3=Union 11._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_375] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_373] (rows=550 width=10) - Conds:SEL_372._col0=RS_366._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_366] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_372] (rows=500 width=10) + Select Operator [SEL_81] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_80] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Union 3 [CUSTOM_SIMPLE_EDGE] + <-Map 18 [CONTAINS] llap + File Output Operator [FS_286] + table:{"name:":"default.a_n19"} + Select Operator [SEL_283] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_281] (rows=1844 width=10) + Conds:MAPJOIN_280._col1=RS_396._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_396] + PartitionCols:_col0 + Select Operator [SEL_395] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_371] (rows=500 width=10) + Filter Operator [FIL_394] (rows=25 width=7) predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_381] - PartitionCols:_col0 - Select Operator [SEL_380] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_379] (rows=25 width=7) - predicate:value is not null - TableScan [TS_251] (rows=25 width=7) - Output:["value"] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_384] - PartitionCols:_col0 - Select Operator [SEL_383] (rows=500 width=10) + TableScan [TS_63] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_280] (rows=1677 width=10) + Conds:SEL_282._col0=RS_367._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_367] + PartitionCols:_col1 + Select Operator [SEL_365] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_364] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_282] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_382] (rows=500 width=10) + Filter Operator [FIL_279] (rows=25 width=7) predicate:value is not null - TableScan [TS_256] (rows=500 width=10) + TableScan [TS_276] (rows=25 width=7) Output:["value"] - Reduce Output Operator [RS_233] - Group By Operator [GBY_230] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_225] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - File Output Operator [FS_226] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_222] - Reduce Output Operator [RS_234] - Group By Operator [GBY_231] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_227] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - File Output Operator [FS_228] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_222] - Reduce Output Operator [RS_235] - Group By Operator [GBY_232] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_229] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - <-Reducer 8 [CONTAINS] llap - File Output Operator [FS_239] - table:{"name:":"default.a_n19"} - Select Operator [SEL_237] (rows=1127 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_236] (rows=1127 width=10) - Conds:RS_41._col3=Union 15._col0(Inner),Output:["_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_207] (rows=550 width=10) - Conds:RS_374._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_374] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_372] - <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_378] - PartitionCols:_col0 - Select Operator [SEL_377] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_376] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_24] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_387] + Reduce Output Operator [RS_295] + Group By Operator [GBY_292] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_287] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_283] + File Output Operator [FS_288] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_283] + Reduce Output Operator [RS_296] + Group By Operator [GBY_293] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_289] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_283] + File Output Operator [FS_290] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_283] + Reduce Output Operator [RS_297] + Group By Operator [GBY_294] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_291] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_283] + <-Map 19 [CONTAINS] llap + File Output Operator [FS_308] + table:{"name:":"default.a_n19"} + Select Operator [SEL_305] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_303] (rows=1844 width=10) + Conds:MAPJOIN_302._col1=RS_397._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_397] PartitionCols:_col0 - Select Operator [SEL_386] (rows=25 width=7) + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_302] (rows=1677 width=10) + Conds:SEL_304._col0=RS_368._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_368] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_304] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_385] (rows=25 width=7) + Filter Operator [FIL_301] (rows=500 width=10) predicate:value is not null - TableScan [TS_261] (rows=25 width=7) + TableScan [TS_298] (rows=500 width=10) Output:["value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_390] + Reduce Output Operator [RS_317] + Group By Operator [GBY_314] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_309] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_305] + File Output Operator [FS_310] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_305] + Reduce Output Operator [RS_318] + Group By Operator [GBY_315] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_311] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_305] + File Output Operator [FS_312] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_305] + Reduce Output Operator [RS_319] + Group By Operator [GBY_316] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_313] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_305] + <-Map 20 [CONTAINS] llap + File Output Operator [FS_330] + table:{"name:":"default.a_n19"} + Select Operator [SEL_327] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_325] (rows=1844 width=10) + Conds:MAPJOIN_324._col1=RS_398._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_398] PartitionCols:_col0 - Select Operator [SEL_389] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_324] (rows=1677 width=10) + Conds:SEL_326._col0=RS_369._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_369] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_326] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_388] (rows=500 width=10) + Filter Operator [FIL_323] (rows=500 width=10) predicate:value is not null - TableScan [TS_266] (rows=500 width=10) + TableScan [TS_320] (rows=500 width=10) Output:["value"] - <-Map 17 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_393] + Reduce Output Operator [RS_339] + Group By Operator [GBY_336] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_331] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_327] + File Output Operator [FS_332] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_327] + Reduce Output Operator [RS_340] + Group By Operator [GBY_337] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_333] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_327] + File Output Operator [FS_334] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_327] + Reduce Output Operator [RS_341] + Group By Operator [GBY_338] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_335] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_327] + <-Map 21 [CONTAINS] llap + File Output Operator [FS_352] + table:{"name:":"default.a_n19"} + Select Operator [SEL_349] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_347] (rows=1844 width=10) + Conds:MAPJOIN_346._col1=RS_399._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_399] PartitionCols:_col0 - Select Operator [SEL_392] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_346] (rows=1677 width=10) + Conds:SEL_348._col0=RS_370._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_370] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_348] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_391] (rows=500 width=10) + Filter Operator [FIL_345] (rows=500 width=10) predicate:value is not null - TableScan [TS_271] (rows=500 width=10) + TableScan [TS_342] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_248] - Group By Operator [GBY_245] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_240] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - File Output Operator [FS_241] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_237] - Reduce Output Operator [RS_249] - Group By Operator [GBY_246] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_242] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - File Output Operator [FS_243] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_237] - Reduce Output Operator [RS_250] - Group By Operator [GBY_247] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_244] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_361] + Group By Operator [GBY_358] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_353] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_349] + File Output Operator [FS_354] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_349] + Reduce Output Operator [RS_362] + Group By Operator [GBY_359] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_355] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_349] + File Output Operator [FS_356] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_349] + Reduce Output Operator [RS_363] + Group By Operator [GBY_360] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_357] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_349] + <-Reducer 2 [CONTAINS] llap + File Output Operator [FS_224] + table:{"name:":"default.a_n19"} + Select Operator [SEL_222] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_221] (rows=605 width=10) + Conds:RS_375._col3=Union 11._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_375] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_373] (rows=550 width=10) + Conds:SEL_372._col0=RS_366._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_366] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_372] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_371] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 11 [SIMPLE_EDGE] + <-Map 10 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_381] + PartitionCols:_col0 + Select Operator [SEL_380] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_379] (rows=25 width=7) + predicate:value is not null + TableScan [TS_251] (rows=25 width=7) + Output:["value"] + <-Map 12 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_384] + PartitionCols:_col0 + Select Operator [SEL_383] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_382] (rows=500 width=10) + predicate:value is not null + TableScan [TS_256] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_233] + Group By Operator [GBY_230] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_225] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_222] + File Output Operator [FS_226] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_222] + Reduce Output Operator [RS_234] + Group By Operator [GBY_231] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_227] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_222] + File Output Operator [FS_228] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_222] + Reduce Output Operator [RS_235] + Group By Operator [GBY_232] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_229] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_222] + <-Reducer 8 [CONTAINS] llap + File Output Operator [FS_239] + table:{"name:":"default.a_n19"} + Select Operator [SEL_237] (rows=1127 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_236] (rows=1127 width=10) + Conds:RS_41._col3=Union 15._col0(Inner),Output:["_col1","_col2"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_207] (rows=550 width=10) + Conds:RS_374._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_374] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_372] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_378] + PartitionCols:_col0 + Select Operator [SEL_377] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_376] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_24] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 15 [SIMPLE_EDGE] + <-Map 14 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_387] + PartitionCols:_col0 + Select Operator [SEL_386] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_385] (rows=25 width=7) + predicate:value is not null + TableScan [TS_261] (rows=25 width=7) + Output:["value"] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_390] + PartitionCols:_col0 + Select Operator [SEL_389] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_388] (rows=500 width=10) + predicate:value is not null + TableScan [TS_266] (rows=500 width=10) + Output:["value"] + <-Map 17 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_393] + PartitionCols:_col0 + Select Operator [SEL_392] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_391] (rows=500 width=10) + predicate:value is not null + TableScan [TS_271] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_248] + Group By Operator [GBY_245] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_240] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_237] + File Output Operator [FS_241] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_249] + Group By Operator [GBY_246] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_242] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_237] + File Output Operator [FS_243] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_250] + Group By Operator [GBY_247] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_244] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_237] Reducer 5 llap File Output Operator [FS_91] - Group By Operator [GBY_89] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_90] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_89] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] Reducer 6 llap File Output Operator [FS_100] - Group By Operator [GBY_98] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_99] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_98] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -3214,281 +3220,287 @@ Stage-5 Stage-3 Reducer 7 llap File Output Operator [FS_130] - Group By Operator [GBY_128] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_125] (rows=544 width=10) - Output:["key","value"] - Group By Operator [GBY_120] (rows=544 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 32 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_426] - PartitionCols:_col0, _col1 - Group By Operator [GBY_425] (rows=1089 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_424] (rows=484 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_423] (rows=484 width=10) - Conds:RS_360._col3=SEL_422._col0(Inner),Output:["_col1","_col2"] - <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_360] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_359] (rows=27 width=7) - Conds:SEL_357._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 25 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_353] - PartitionCols:_col0 - Select Operator [SEL_352] (rows=25 width=7) + Select Operator [SEL_129] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_128] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_125] (rows=544 width=10) + Output:["key","value"] + Group By Operator [GBY_120] (rows=544 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_426] + PartitionCols:_col0, _col1 + Group By Operator [GBY_425] (rows=1089 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_424] (rows=484 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_423] (rows=484 width=10) + Conds:RS_360._col3=SEL_422._col0(Inner),Output:["_col1","_col2"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_360] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_359] (rows=27 width=7) + Conds:SEL_357._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 25 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_353] + PartitionCols:_col0 + Select Operator [SEL_352] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_351] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_72] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_357] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_351] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_72] (rows=25 width=7) + Filter Operator [FIL_355] (rows=25 width=7) + predicate:key is not null + TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_357] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_355] (rows=25 width=7) - predicate:key is not null - TableScan [TS_3] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_422] (rows=440 width=10) - Output:["_col0"] - Group By Operator [GBY_421] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 31 [SIMPLE_EDGE] - <-Map 35 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_438] - PartitionCols:_col0, _col1 - Group By Operator [GBY_437] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_436] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_435] (rows=500 width=10) - predicate:value is not null - TableScan [TS_345] (rows=500 width=10) - Output:["key","value"] - <-Reducer 30 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_420] - PartitionCols:_col0, _col1 - Group By Operator [GBY_419] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_418] (rows=381 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_417] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_434] - PartitionCols:_col0, _col1 - Group By Operator [GBY_433] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_432] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_431] (rows=500 width=10) - predicate:value is not null - TableScan [TS_339] (rows=500 width=10) - Output:["key","value"] - <-Reducer 28 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_416] - PartitionCols:_col0, _col1 - Group By Operator [GBY_415] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_414] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_413] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] - <-Map 26 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_412] - PartitionCols:_col0, _col1 - Group By Operator [GBY_411] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_410] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_409] (rows=25 width=7) - predicate:value is not null - TableScan [TS_309] (rows=25 width=7) - Output:["key","value"] - <-Map 33 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_430] - PartitionCols:_col0, _col1 - Group By Operator [GBY_429] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_428] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_427] (rows=500 width=10) - predicate:value is not null - TableScan [TS_333] (rows=500 width=10) - Output:["key","value"] - <-Reducer 4 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_371] - PartitionCols:_col0, _col1 - Group By Operator [GBY_370] (rows=1089 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_369] (rows=605 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] llap - Reduce Output Operator [RS_273] - PartitionCols:_col0, _col1 - Group By Operator [GBY_272] (rows=1210 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_270] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_269] (rows=605 width=10) - Conds:RS_58._col3=RS_384._col0(Inner),Output:["_col1","_col2"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_252] (rows=550 width=10) - Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_364] - PartitionCols:_col0 - Select Operator [SEL_362] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=500 width=10) - predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 17 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_381] - PartitionCols:_col0 - Select Operator [SEL_380] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_379] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_29] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 22 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_384] - PartitionCols:_col0 - Select Operator [SEL_383] (rows=381 width=10) - Output:["_col0"] - Group By Operator [GBY_382] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 21 [SIMPLE_EDGE] - <-Map 24 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_408] - PartitionCols:_col0, _col1 - Group By Operator [GBY_407] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_406] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_405] (rows=500 width=10) - predicate:value is not null - TableScan [TS_303] (rows=500 width=10) - Output:["key","value"] - <-Reducer 20 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_400] - PartitionCols:_col0, _col1 - Group By Operator [GBY_399] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_398] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_397] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 19 [SIMPLE_EDGE] - <-Map 18 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_396] - PartitionCols:_col0, _col1 - Group By Operator [GBY_395] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_394] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_393] (rows=25 width=7) - predicate:value is not null - TableScan [TS_286] (rows=25 width=7) - Output:["key","value"] - <-Map 23 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_404] - PartitionCols:_col0, _col1 - Group By Operator [GBY_403] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_402] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_401] (rows=500 width=10) - predicate:value is not null - TableScan [TS_297] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_264] - PartitionCols:_col0, _col1 - Group By Operator [GBY_263] (rows=1210 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_261] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_260] (rows=605 width=10) - Conds:RS_365._col3=RS_368._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_365] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_363] (rows=550 width=10) - Conds:SEL_362._col0=RS_358._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_358] - PartitionCols:_col0 - Select Operator [SEL_356] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_354] (rows=25 width=7) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_3] - Please refer to the previous Select Operator [SEL_362] - <-Reducer 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_368] - PartitionCols:_col0 - Select Operator [SEL_367] (rows=262 width=10) - Output:["_col0"] - Group By Operator [GBY_366] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_388] - PartitionCols:_col0, _col1 - Group By Operator [GBY_387] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_386] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_385] (rows=25 width=7) - predicate:value is not null - TableScan [TS_274] (rows=25 width=7) - Output:["key","value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_392] - PartitionCols:_col0, _col1 - Group By Operator [GBY_391] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_390] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_389] (rows=500 width=10) - predicate:value is not null - TableScan [TS_280] (rows=500 width=10) - Output:["key","value"] + <-Select Operator [SEL_422] (rows=440 width=10) + Output:["_col0"] + Group By Operator [GBY_421] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 35 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_438] + PartitionCols:_col0, _col1 + Group By Operator [GBY_437] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_436] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_435] (rows=500 width=10) + predicate:value is not null + TableScan [TS_345] (rows=500 width=10) + Output:["key","value"] + <-Reducer 30 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_420] + PartitionCols:_col0, _col1 + Group By Operator [GBY_419] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_418] (rows=381 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_417] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 34 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_434] + PartitionCols:_col0, _col1 + Group By Operator [GBY_433] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_432] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_431] (rows=500 width=10) + predicate:value is not null + TableScan [TS_339] (rows=500 width=10) + Output:["key","value"] + <-Reducer 28 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_416] + PartitionCols:_col0, _col1 + Group By Operator [GBY_415] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_414] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_413] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 27 [SIMPLE_EDGE] + <-Map 26 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_412] + PartitionCols:_col0, _col1 + Group By Operator [GBY_411] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_410] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_409] (rows=25 width=7) + predicate:value is not null + TableScan [TS_309] (rows=25 width=7) + Output:["key","value"] + <-Map 33 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_430] + PartitionCols:_col0, _col1 + Group By Operator [GBY_429] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_428] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_427] (rows=500 width=10) + predicate:value is not null + TableScan [TS_333] (rows=500 width=10) + Output:["key","value"] + <-Reducer 4 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_371] + PartitionCols:_col0, _col1 + Group By Operator [GBY_370] (rows=1089 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_369] (rows=605 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] llap + Reduce Output Operator [RS_273] + PartitionCols:_col0, _col1 + Group By Operator [GBY_272] (rows=1210 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_270] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_269] (rows=605 width=10) + Conds:RS_58._col3=RS_384._col0(Inner),Output:["_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_252] (rows=550 width=10) + Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_364] + PartitionCols:_col0 + Select Operator [SEL_362] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_361] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 17 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_381] + PartitionCols:_col0 + Select Operator [SEL_380] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_379] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_29] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 22 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_384] + PartitionCols:_col0 + Select Operator [SEL_383] (rows=381 width=10) + Output:["_col0"] + Group By Operator [GBY_382] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 21 [SIMPLE_EDGE] + <-Map 24 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_408] + PartitionCols:_col0, _col1 + Group By Operator [GBY_407] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_406] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_405] (rows=500 width=10) + predicate:value is not null + TableScan [TS_303] (rows=500 width=10) + Output:["key","value"] + <-Reducer 20 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_400] + PartitionCols:_col0, _col1 + Group By Operator [GBY_399] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_398] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_397] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 19 [SIMPLE_EDGE] + <-Map 18 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_396] + PartitionCols:_col0, _col1 + Group By Operator [GBY_395] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_394] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_393] (rows=25 width=7) + predicate:value is not null + TableScan [TS_286] (rows=25 width=7) + Output:["key","value"] + <-Map 23 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_404] + PartitionCols:_col0, _col1 + Group By Operator [GBY_403] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_402] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_401] (rows=500 width=10) + predicate:value is not null + TableScan [TS_297] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_264] + PartitionCols:_col0, _col1 + Group By Operator [GBY_263] (rows=1210 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_261] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_260] (rows=605 width=10) + Conds:RS_365._col3=RS_368._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_365] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_363] (rows=550 width=10) + Conds:SEL_362._col0=RS_358._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_358] + PartitionCols:_col0 + Select Operator [SEL_356] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_354] (rows=25 width=7) + predicate:(key is not null and value is not null) + Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_362] + <-Reducer 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_368] + PartitionCols:_col0 + Select Operator [SEL_367] (rows=262 width=10) + Output:["_col0"] + Group By Operator [GBY_366] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_388] + PartitionCols:_col0, _col1 + Group By Operator [GBY_387] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_386] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_385] (rows=25 width=7) + predicate:value is not null + TableScan [TS_274] (rows=25 width=7) + Output:["key","value"] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_392] + PartitionCols:_col0, _col1 + Group By Operator [GBY_391] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_390] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_389] (rows=500 width=10) + predicate:value is not null + TableScan [TS_280] (rows=500 width=10) + Output:["key","value"] Reducer 8 llap File Output Operator [FS_139] - Group By Operator [GBY_137] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_136] - Group By Operator [GBY_135] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_134] (rows=544 width=10) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_120] + Select Operator [SEL_138] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_137] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_134] (rows=544 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] Reducer 9 llap File Output Operator [FS_148] - Group By Operator [GBY_146] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_145] - Group By Operator [GBY_144] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_143] (rows=544 width=10) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_120] + Select Operator [SEL_147] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_146] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_145] + Group By Operator [GBY_144] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_143] (rows=544 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] Stage-6 Stats Work{} Stage-1 @@ -3558,69 +3570,73 @@ Stage-4 Stage-2 Reducer 6 llap File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_21] - table:{"name:":"default.dest1_n172"} - Select Operator [SEL_19] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_18] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_13] (rows=1 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_64] - PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_62] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_48] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_59] - PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_57] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_56] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_55] - Group By Operator [GBY_54] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_53] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - PARTITION_ONLY_SHUFFLE [RS_25] - Group By Operator [GBY_24] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_23] (rows=1 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_19] + Select Operator [SEL_27] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_26] (rows=1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_21] + table:{"name:":"default.dest1_n172"} + Select Operator [SEL_19] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_18] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Group By Operator [GBY_13] (rows=1 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_64] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_62] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_48] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_59] + PartitionCols:_col0, _col1 + Group By Operator [GBY_58] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_57] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_56] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap + PARTITION_ONLY_SHUFFLE [RS_55] + Group By Operator [GBY_54] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_53] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_25] + Group By Operator [GBY_24] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_23] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_19] Reducer 7 llap File Output Operator [FS_42] - Group By Operator [GBY_40] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_39] - Group By Operator [GBY_38] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] - Select Operator [SEL_37] (rows=1 width=456) - Output:["key","val1","val2"] - Select Operator [SEL_33] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_32] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_13] + Select Operator [SEL_41] (rows=1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_40] (rows=1 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)","max(VALUE._col8)","avg(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col11)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_39] + Group By Operator [GBY_38] (rows=1 width=696) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(val1))","avg(COALESCE(length(val1),0))","count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(val1, 'hll')","max(length(val2))","avg(COALESCE(length(val2),0))","count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(val2, 'hll')"] + Select Operator [SEL_37] (rows=1 width=456) + Output:["key","val1","val2"] + Select Operator [SEL_33] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_32] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_13] Stage-5 Stats Work{} Stage-1 @@ -3780,89 +3796,93 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_25] - Group By Operator [GBY_23] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_18] - table:{"name:":"default.dest1_n172"} - Select Operator [SEL_16] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_73] - PartitionCols:_col0 - Group By Operator [GBY_71] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_70] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_47] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_74] - PartitionCols:_col0, _col1 - Group By Operator [GBY_72] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_70] - <-Map 9 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_78] - PartitionCols:_col0 - Group By Operator [GBY_76] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_75] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_54] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_79] - PartitionCols:_col0, _col1 - Group By Operator [GBY_77] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_75] - <-Reducer 2 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_68] - PartitionCols:_col0 - Group By Operator [GBY_66] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_65] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_64] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_63] - Group By Operator [GBY_62] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_61] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_69] - PartitionCols:_col0, _col1 - Group By Operator [GBY_67] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_65] - PARTITION_ONLY_SHUFFLE [RS_22] - Group By Operator [GBY_21] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_20] (rows=1 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_16] + Select Operator [SEL_24] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_23] (rows=1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_18] + table:{"name:":"default.dest1_n172"} + Select Operator [SEL_16] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_73] + PartitionCols:_col0 + Group By Operator [GBY_71] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_70] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_47] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_74] + PartitionCols:_col0, _col1 + Group By Operator [GBY_72] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_70] + <-Map 9 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_78] + PartitionCols:_col0 + Group By Operator [GBY_76] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_75] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_54] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_79] + PartitionCols:_col0, _col1 + Group By Operator [GBY_77] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_75] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_66] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_65] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_64] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap + PARTITION_ONLY_SHUFFLE [RS_63] + Group By Operator [GBY_62] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_61] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_69] + PartitionCols:_col0, _col1 + Group By Operator [GBY_67] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_65] + PARTITION_ONLY_SHUFFLE [RS_22] + Group By Operator [GBY_21] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_20] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] Reducer 7 llap File Output Operator [FS_39] - Group By Operator [GBY_37] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_32] - table:{"name:":"default.dest2_n43"} - Select Operator [SEL_30] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_29] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_36] - Group By Operator [GBY_35] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] - Select Operator [SEL_34] (rows=1 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_30] + Select Operator [SEL_38] (rows=1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_37] (rows=1 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)","max(VALUE._col8)","avg(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col11)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_32] + table:{"name:":"default.dest2_n43"} + Select Operator [SEL_30] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_29] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_36] + Group By Operator [GBY_35] (rows=1 width=696) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(val1))","avg(COALESCE(length(val1),0))","count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(val1, 'hll')","max(length(val2))","avg(COALESCE(length(val2),0))","count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(val2, 'hll')"] + Select Operator [SEL_34] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_30] Stage-5 Stats Work{} Stage-1 @@ -3916,75 +3936,79 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_23] - Group By Operator [GBY_21] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_16] - table:{"name:":"default.dest1_n172"} - Select Operator [SEL_14] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_64] - PartitionCols:_col0 - Group By Operator [GBY_62] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_61] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_45] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_65] - PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_61] - <-Reducer 2 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_59] - PartitionCols:_col0 - Group By Operator [GBY_57] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_56] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_55] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_54] - Group By Operator [GBY_53] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_52] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_60] - PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_56] - PARTITION_ONLY_SHUFFLE [RS_20] - Group By Operator [GBY_19] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_18] (rows=1 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_14] + Select Operator [SEL_22] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_21] (rows=1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_16] + table:{"name:":"default.dest1_n172"} + Select Operator [SEL_14] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_64] + PartitionCols:_col0 + Group By Operator [GBY_62] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_61] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_45] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_65] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_61] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_59] + PartitionCols:_col0 + Group By Operator [GBY_57] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_56] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_55] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap + PARTITION_ONLY_SHUFFLE [RS_54] + Group By Operator [GBY_53] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_52] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_60] + PartitionCols:_col0, _col1 + Group By Operator [GBY_58] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_56] + PARTITION_ONLY_SHUFFLE [RS_20] + Group By Operator [GBY_19] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_18] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] Reducer 7 llap File Output Operator [FS_37] - Group By Operator [GBY_35] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_30] - table:{"name:":"default.dest2_n43"} - Select Operator [SEL_28] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_27] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_34] - Group By Operator [GBY_33] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] - Select Operator [SEL_32] (rows=1 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_28] + Select Operator [SEL_36] (rows=1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_35] (rows=1 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)","max(VALUE._col8)","avg(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col11)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_30] + table:{"name:":"default.dest2_n43"} + Select Operator [SEL_28] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_27] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_34] + Group By Operator [GBY_33] (rows=1 width=696) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(val1))","avg(COALESCE(length(val1),0))","count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(val1, 'hll')","max(length(val2))","avg(COALESCE(length(val2),0))","count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(val2, 'hll')"] + Select Operator [SEL_32] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_28] Stage-5 Stats Work{} Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out b/ql/src/test/results/clientpositive/llap/fm-sketch.q.out index f77a6db39a..ae1059101f 100644 --- a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out +++ b/ql/src/test/results/clientpositive/llap/fm-sketch.q.out @@ -47,33 +47,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'fm', 16) + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'fm', 16) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -161,33 +165,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'fm', 16) + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'fm', 16) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/groupby10.q.out b/ql/src/test/results/clientpositive/llap/groupby10.q.out index d92bcb3d80..3d509e387b 100644 --- a/ql/src/test/results/clientpositive/llap/groupby10.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby10.q.out @@ -142,35 +142,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -217,35 +221,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -474,35 +482,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -549,35 +561,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -776,7 +792,7 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -803,37 +819,45 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby11.q.out b/ql/src/test/results/clientpositive/llap/groupby11.q.out index 01c23f715e..79a9d056b4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby11.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby11.q.out @@ -131,39 +131,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 307 Data size: 57102 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), val1 (type: int), val2 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), min(VALUE._col7), max(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col7, 'hll') keys: '111' (type: string) mode: partial1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 307 Data size: 196173 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '111' (type: string) null sort order: z sort order: + Map-reduce partition columns: '111' (type: string) - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 307 Data size: 196173 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: '111' (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 571 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -215,39 +215,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), val1 (type: int), val2 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), val1 (type: int), CASE WHEN (val1 is null) THEN (1) ELSE (null) END (type: int), val2 (type: int), CASE WHEN (val2 is null) THEN (1) ELSE (null) END (type: int) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), min(VALUE._col7), max(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col7, 'hll') keys: '111' (type: string) mode: partial1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 307 Data size: 196173 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '111' (type: string) null sort order: z sort order: + Map-reduce partition columns: '111' (type: string) - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 307 Data size: 196173 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: '111' (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 571 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby12.q.out b/ql/src/test/results/clientpositive/llap/groupby12.q.out index add92ca3af..8c1c865052 100644 --- a/ql/src/test/results/clientpositive/llap/groupby12.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby12.q.out @@ -79,22 +79,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map.q.out b/ql/src/test/results/clientpositive/llap/groupby1_map.q.out index 77d547ff41..14e118f0c4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_map.q.out @@ -82,31 +82,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out index 3a42667d2d..c72e590015 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out @@ -82,31 +82,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out index 367d27e32b..8b66f549a0 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out @@ -99,31 +99,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out index da2ba5a2b6..8bad7bdb6e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out @@ -78,22 +78,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: double) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), value (type: double), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col4), compute_bit_vector(VALUE._col3, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2.q.out b/ql/src/test/results/clientpositive/llap/groupby2.q.out index bdad787edb..572da8fc57 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -98,35 +98,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map.q.out index 67684ced33..c4d3b2176c 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_map.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out index e9f8ec6224..bf217cac03 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -242,31 +246,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out index 9b87d90a7a..b58f074c0c 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out @@ -101,31 +101,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out index 2e8481d485..29e0fe8651 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out @@ -79,22 +79,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out index c4fce00adf..4ae30d5d09 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out @@ -80,22 +80,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string), c3 (type: int), CASE WHEN (c3 is null) THEN (1) ELSE (null) END (type: int), c4 (type: int), CASE WHEN (c4 is null) THEN (1) ELSE (null) END (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll'), min(VALUE._col11), max(VALUE._col11), count(VALUE._col12), compute_bit_vector(VALUE._col11, 'hll'), min(VALUE._col13), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col13, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out index 7e05e27229..1e1b31bc27 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -108,30 +108,34 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: binary), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), _col19 (type: binary), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), _col23 (type: binary), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), _col27 (type: binary), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), _col31 (type: binary), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), _col35 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), min(VALUE._col24), max(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), min(VALUE._col28), max(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31), min(VALUE._col32), max(VALUE._col33), count(VALUE._col34), compute_bit_vector(VALUE._col35) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map.q.out index 93972862f3..c079213cbe 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_map.q.out @@ -99,17 +99,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out index 25526614bb..85666e663b 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out @@ -103,17 +103,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll'), min(c10), max(c10), count(CASE WHEN (c10 is null) THEN (1) ELSE (null) END), compute_bit_vector(c10, 'hll'), min(c11), max(c11), count(CASE WHEN (c11 is null) THEN (1) ELSE (null) END), compute_bit_vector(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43 + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary), 'DOUBLE' (type: string), _col36 (type: double), _col37 (type: double), _col38 (type: bigint), COALESCE(ndv_compute_bit_vector(_col39),0) (type: bigint), _col39 (type: binary), 'DOUBLE' (type: string), _col40 (type: double), _col41 (type: double), _col42 (type: bigint), COALESCE(ndv_compute_bit_vector(_col43),0) (type: bigint), _col43 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65 + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out index 761cea91a6..3bf670090c 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out @@ -114,17 +114,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out index 0f3e3c1fc1..677be4e232 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out @@ -92,17 +92,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out index 097dfb9c25..0083fa716b 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out @@ -96,17 +96,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll'), min(c10), max(c10), count(CASE WHEN (c10 is null) THEN (1) ELSE (null) END), compute_bit_vector(c10, 'hll'), min(c11), max(c11), count(CASE WHEN (c11 is null) THEN (1) ELSE (null) END), compute_bit_vector(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43 + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'DOUBLE' (type: string), _col28 (type: double), _col29 (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'DOUBLE' (type: string), _col32 (type: double), _col33 (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary), 'DOUBLE' (type: string), _col36 (type: double), _col37 (type: double), _col38 (type: bigint), COALESCE(ndv_compute_bit_vector(_col39),0) (type: bigint), _col39 (type: binary), 'DOUBLE' (type: string), _col40 (type: double), _col41 (type: double), _col42 (type: bigint), COALESCE(ndv_compute_bit_vector(_col43),0) (type: bigint), _col43 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65 + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4.q.out b/ql/src/test/results/clientpositive/llap/groupby4.q.out index c0492e4c33..deba74d361 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4.q.out @@ -91,35 +91,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4_map.q.out b/ql/src/test/results/clientpositive/llap/groupby4_map.q.out index dc5c5eb7e2..94adea3d57 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4_map.q.out @@ -75,17 +75,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out index 14cd4909e7..c0f17fcff1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out @@ -75,17 +75,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out index 14b97c4733..ce3f322d19 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out @@ -74,22 +74,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), c1 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5.q.out b/ql/src/test/results/clientpositive/llap/groupby5.q.out index 2405264a37..6cda41847d 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5.q.out @@ -103,35 +103,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5_map.q.out b/ql/src/test/results/clientpositive/llap/groupby5_map.q.out index aa6b0908cd..49da11f790 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5_map.q.out @@ -77,17 +77,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out index c21e2de3df..d2362cae42 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out @@ -77,17 +77,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out index fe2dbffa5a..058b19308e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out @@ -84,22 +84,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6.q.out b/ql/src/test/results/clientpositive/llap/groupby6.q.out index 0e45e045b3..ef6b617f2a 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6.q.out @@ -91,35 +91,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6_map.q.out b/ql/src/test/results/clientpositive/llap/groupby6_map.q.out index 13e38e6304..12c8dd0ae3 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6_map.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out index c07bc8ae26..248d60ab6d 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out @@ -92,31 +92,35 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out index 18e5c756db..0290fd83e4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out @@ -74,22 +74,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), c1 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map.q.out b/ql/src/test/results/clientpositive/llap/groupby7_map.q.out index a37e6cf957..ecb3e8c3e1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_map.q.out @@ -118,31 +118,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -169,31 +173,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out index f2cd96aa1a..05281b570a 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out @@ -94,16 +94,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -127,46 +127,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out index 781547ed0f..60e2dde241 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out @@ -136,31 +136,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -203,31 +207,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out index 8346f49a2d..41f46b81b3 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out @@ -107,22 +107,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -152,22 +156,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out index 5dc188d761..b0ca5d1a03 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out @@ -135,22 +135,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -181,22 +185,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8.q.out b/ql/src/test/results/clientpositive/llap/groupby8.q.out index a008928c9c..0d4a40b507 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8.q.out @@ -126,35 +126,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -201,35 +205,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1034,35 +1042,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1109,35 +1121,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8_map.q.out b/ql/src/test/results/clientpositive/llap/groupby8_map.q.out index 7a62ab1963..30787514ea 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8_map.q.out @@ -93,16 +93,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -126,46 +126,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out index 8668593a92..42d81fa702 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out @@ -134,31 +134,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -201,31 +205,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out index d3a55423c5..c9adadb12c 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out @@ -96,7 +96,7 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -123,37 +123,45 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), CASE WHEN (value is null) THEN (1) ELSE (null) END (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(VALUE._col1), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col6, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby9.q.out b/ql/src/test/results/clientpositive/llap/groupby9.q.out index 0ef88e06ef..0d3f164aa8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby9.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby9.q.out @@ -116,31 +116,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -167,31 +171,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -987,31 +995,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1038,31 +1050,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1858,31 +1874,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1909,31 +1929,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2731,31 +2755,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2782,31 +2810,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3602,31 +3634,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3653,31 +3689,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out index f166ee6784..baa42d824f 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out @@ -740,31 +740,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -808,31 +812,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out b/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out index e742242791..193cc03daf 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out @@ -119,31 +119,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 271000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -166,31 +170,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out b/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out index eba89d8da5..cdfee02301 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out @@ -192,17 +192,21 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 250 Data size: 64250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out index 1f8d703728..69facf8114 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out @@ -203,53 +203,57 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out index 8f93eb0dcf..9fdc5c3dd1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out @@ -203,53 +203,57 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out index aec09dec3c..40576c3e82 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out @@ -116,31 +116,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -167,31 +171,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out index e52dafd8b8..ad164d35d3 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out @@ -97,16 +97,16 @@ STAGE PLANS: outputColumnNames: key, c1 Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE @@ -133,46 +133,54 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out index 4d4e87c3ad..189ccf5786 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out @@ -109,16 +109,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -145,46 +145,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -369,16 +377,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -405,46 +413,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -628,16 +644,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -664,46 +680,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -887,16 +911,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -923,46 +947,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out b/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out index 77c888fd92..f80f935d01 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out @@ -87,31 +87,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_position.q.out b/ql/src/test/results/clientpositive/llap/groupby_position.q.out index 1d6ed69bed..e92e2bcc90 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_position.q.out @@ -114,31 +114,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 83 Data size: 15604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -165,31 +169,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -391,31 +399,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 83 Data size: 15604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -442,31 +454,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out b/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out index 9b857193a2..e03c2e58c3 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out @@ -201,41 +201,45 @@ STAGE PLANS: sort order: Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out index 0f12ff8348..0944349866 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out @@ -201,41 +201,45 @@ STAGE PLANS: sort order: Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string), c3 (type: int), CASE WHEN (c3 is null) THEN (1) ELSE (null) END (type: int), c4 (type: int), CASE WHEN (c4 is null) THEN (1) ELSE (null) END (type: int) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll'), min(VALUE._col11), max(VALUE._col11), count(VALUE._col12), compute_bit_vector(VALUE._col11, 'hll'), min(VALUE._col13), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col13, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -505,36 +509,36 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), CASE WHEN (key is null) THEN (1) ELSE (null) END (type: int), key (type: string), c1 (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), CASE WHEN (c2 is null) THEN (1) ELSE (null) END (type: int), c2 (type: string), c3 (type: int), CASE WHEN (c3 is null) THEN (1) ELSE (null) END (type: int), c4 (type: int), CASE WHEN (c4 is null) THEN (1) ELSE (null) END (type: int) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col7), avg(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10, 'hll'), min(VALUE._col11), max(VALUE._col11), count(VALUE._col12), compute_bit_vector(VALUE._col11, 'hll'), min(VALUE._col13), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col13, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out index 675f58eeeb..e81f2e1b6b 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out @@ -553,31 +553,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -621,31 +625,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out index 4228454958..3d77f4ccef 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out @@ -114,19 +114,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -177,34 +177,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -420,53 +424,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -609,19 +617,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -672,34 +680,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -840,19 +852,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -903,34 +915,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1079,19 +1095,19 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1142,34 +1158,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1386,53 +1406,57 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1643,53 +1667,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1912,53 +1940,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2117,19 +2149,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2226,19 +2258,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2289,34 +2321,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -2486,19 +2522,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2618,37 +2654,41 @@ STAGE PLANS: /t1_n80 [t1_n80] Reducer 3 Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 5 Execution mode: llap Needs Tagging: false @@ -2698,19 +2738,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Union 2 Vertex: Union 2 @@ -2949,19 +2989,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 607 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap Path -> Alias: @@ -3010,34 +3050,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3523,53 +3567,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3710,19 +3758,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3773,34 +3821,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3952,19 +4004,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, key4, cnt Statistics: Num rows: 6 Data size: 612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(key4), max(key4), count(CASE WHEN (key4 is null) THEN (1) ELSE (null) END), compute_bit_vector(key4, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4015,34 +4067,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4193,19 +4249,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4256,34 +4312,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4441,19 +4501,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4504,34 +4564,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4706,16 +4770,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4744,46 +4808,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -4959,16 +5031,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4997,46 +5069,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out index 7b01cc8e1d..4c5846825e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out @@ -113,31 +113,35 @@ STAGE PLANS: outputColumnNames: val, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out index 0c34c3dc5b..abbb77a712 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out @@ -92,33 +92,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -234,33 +238,37 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out index 656b2547ad..b0f0aed1b8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out @@ -113,31 +113,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -271,31 +275,35 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out index cbb258d66e..cd43c8c50e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out @@ -92,33 +92,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -262,33 +266,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -461,31 +469,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out index 08d94dc5c5..27999b098a 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out @@ -125,53 +125,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -347,53 +351,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -599,53 +607,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out index 5e785ffeb5..2cc73e78bb 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out index 0e528ffa37..a661efde2e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out @@ -114,19 +114,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -177,34 +177,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -442,53 +446,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -631,19 +639,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -694,34 +702,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -862,19 +874,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -925,34 +937,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1101,19 +1117,19 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1164,34 +1180,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1430,53 +1450,57 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1080 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1709,53 +1733,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2000,53 +2028,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2205,19 +2237,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2314,19 +2346,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2377,34 +2409,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -2575,19 +2611,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2707,37 +2743,41 @@ STAGE PLANS: /t1_n56 [t1_n56] Reducer 3 Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 5 Execution mode: vectorized, llap Needs Tagging: false @@ -2808,19 +2848,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Union 2 Vertex: Union 2 @@ -3059,19 +3099,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 607 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap Path -> Alias: @@ -3120,34 +3160,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3677,53 +3721,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3864,19 +3912,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3927,34 +3975,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4106,19 +4158,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, key4, cnt Statistics: Num rows: 6 Data size: 612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(key4), max(key4), count(CASE WHEN (key4 is null) THEN (1) ELSE (null) END), compute_bit_vector(key4, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4169,34 +4221,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4347,19 +4403,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4410,34 +4466,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4595,19 +4655,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(CASE WHEN (key3 is null) THEN (1) ELSE (null) END), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4658,34 +4718,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4861,16 +4925,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4915,46 +4979,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -5131,16 +5203,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5185,46 +5257,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out index a1ad55a9fb..8db3de94d4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out @@ -92,33 +92,37 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/hll.q.out b/ql/src/test/results/clientpositive/llap/hll.q.out index 0f2e13b1b2..501a6bd745 100644 --- a/ql/src/test/results/clientpositive/llap/hll.q.out +++ b/ql/src/test/results/clientpositive/llap/hll.q.out @@ -47,33 +47,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -161,33 +165,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out index c62a816b7a..4bd067d039 100644 --- a/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out @@ -77,18 +77,18 @@ STAGE PLANS: outputColumnNames: c1, c2, p1 Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') keys: p1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 411 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out index dc5ce6b965..0c9255f744 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out @@ -484,35 +484,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 316 Data size: 142516 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out index 0b6952b861..0bf6efd7d5 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out @@ -91,35 +91,35 @@ STAGE PLANS: outputColumnNames: key, value, agg, part Statistics: Num rows: 750 Data size: 335250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(agg)), avg(COALESCE(length(agg),0)), count(CASE WHEN (agg is null) THEN (1) ELSE (null) END), compute_bit_vector(agg, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1583,35 +1583,35 @@ STAGE PLANS: outputColumnNames: key, value, agg, part Statistics: Num rows: 1000 Data size: 447000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(agg)), avg(COALESCE(length(agg),0)), count(CASE WHEN (agg is null) THEN (1) ELSE (null) END), compute_bit_vector(agg, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1821,35 +1821,35 @@ STAGE PLANS: outputColumnNames: key, value, agg, part Statistics: Num rows: 500 Data size: 223500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(agg)), avg(COALESCE(length(agg),0)), count(CASE WHEN (agg is null) THEN (1) ELSE (null) END), compute_bit_vector(agg, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out index b6c7644a59..62b4492243 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out @@ -102,37 +102,37 @@ STAGE PLANS: outputColumnNames: key, value, part Statistics: Num rows: 316 Data size: 112496 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -328,35 +328,35 @@ STAGE PLANS: outputColumnNames: key, value, part Statistics: Num rows: 500 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -515,36 +515,36 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 208033 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '1' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '1' (type: string) null sort order: z sort order: + Map-reduce partition columns: '1' (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '1' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -720,35 +720,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 307 Data size: 110520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '1' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '1' (type: string) null sort order: z sort order: + Map-reduce partition columns: '1' (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '1' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out index bab2ac51a9..eba172bf96 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out @@ -161,12 +161,12 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 1000 Data size: 373000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2008-04-08' (type: string), hr (type: string) minReductionHashAggr: 0.684 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 360872 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 316 Data size: 211720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: '2008-04-08' (type: string), _col1 (type: string) @@ -174,9 +174,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 360872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 211720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: false Reduce Output Operator bucketingVersion: 2 @@ -194,30 +194,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2008-04-08' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 316 Data size: 190232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 316 Data size: 255328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 255328 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/innerjoin.q.out b/ql/src/test/results/clientpositive/llap/innerjoin.q.out index 5355097c9c..18e4e31f23 100644 --- a/ql/src/test/results/clientpositive/llap/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/innerjoin.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input11.q.out b/ql/src/test/results/clientpositive/llap/input11.q.out index 6b6415f375..735aaa168a 100644 --- a/ql/src/test/results/clientpositive/llap/input11.q.out +++ b/ql/src/test/results/clientpositive/llap/input11.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input12.q.out b/ql/src/test/results/clientpositive/llap/input12.q.out index 66ac09aa05..08eb866428 100644 --- a/ql/src/test/results/clientpositive/llap/input12.q.out +++ b/ql/src/test/results/clientpositive/llap/input12.q.out @@ -87,16 +87,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -117,16 +117,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 5225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -147,67 +147,75 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 166 Data size: 30544 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input13.q.out b/ql/src/test/results/clientpositive/llap/input13.q.out index 62b0e61daf..42dc17c880 100644 --- a/ql/src/test/results/clientpositive/llap/input13.q.out +++ b/ql/src/test/results/clientpositive/llap/input13.q.out @@ -92,16 +92,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -122,16 +122,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 5225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key >= 200) and (key < 300)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -152,19 +152,19 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 55 Data size: 10120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -185,48 +185,56 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input14.q.out b/ql/src/test/results/clientpositive/llap/input14.q.out index f3e5526fb8..add7aaa25f 100644 --- a/ql/src/test/results/clientpositive/llap/input14.q.out +++ b/ql/src/test/results/clientpositive/llap/input14.q.out @@ -89,31 +89,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input17.q.out b/ql/src/test/results/clientpositive/llap/input17.q.out index 307403980c..36e2b44dbb 100644 --- a/ql/src/test/results/clientpositive/llap/input17.q.out +++ b/ql/src/test/results/clientpositive/llap/input17.q.out @@ -86,31 +86,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 11 Data size: 29524 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3548 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3076 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3548 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 3076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3564 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3564 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3076 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 3076 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3076 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input18.q.out b/ql/src/test/results/clientpositive/llap/input18.q.out index 4623d8b3ac..17b7f0f4a7 100644 --- a/ql/src/test/results/clientpositive/llap/input18.q.out +++ b/ql/src/test/results/clientpositive/llap/input18.q.out @@ -89,31 +89,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 31208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input1_limit.q.out b/ql/src/test/results/clientpositive/llap/input1_limit.q.out index 0e9d0dec21..3a071f5675 100644 --- a/ql/src/test/results/clientpositive/llap/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/input1_limit.q.out @@ -115,31 +115,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -167,31 +171,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input20.q.out b/ql/src/test/results/clientpositive/llap/input20.q.out index 819d888426..3877312b70 100644 --- a/ql/src/test/results/clientpositive/llap/input20.q.out +++ b/ql/src/test/results/clientpositive/llap/input20.q.out @@ -101,31 +101,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input30.q.out b/ql/src/test/results/clientpositive/llap/input30.q.out index 769df05dac..10668f5c76 100644 --- a/ql/src/test/results/clientpositive/llap/input30.q.out +++ b/ql/src/test/results/clientpositive/llap/input30.q.out @@ -86,17 +86,21 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -210,17 +214,17 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input32.q.out b/ql/src/test/results/clientpositive/llap/input32.q.out index 690279d6f3..45325ffcdd 100644 --- a/ql/src/test/results/clientpositive/llap/input32.q.out +++ b/ql/src/test/results/clientpositive/llap/input32.q.out @@ -85,17 +85,21 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input33.q.out b/ql/src/test/results/clientpositive/llap/input33.q.out index daf57dea3a..6a707a9492 100644 --- a/ql/src/test/results/clientpositive/llap/input33.q.out +++ b/ql/src/test/results/clientpositive/llap/input33.q.out @@ -101,31 +101,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input34.q.out b/ql/src/test/results/clientpositive/llap/input34.q.out index 3f702cdf3b..7c783f6c0f 100644 --- a/ql/src/test/results/clientpositive/llap/input34.q.out +++ b/ql/src/test/results/clientpositive/llap/input34.q.out @@ -75,31 +75,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input35.q.out b/ql/src/test/results/clientpositive/llap/input35.q.out index 85e5412b47..7d6d1f0cba 100644 --- a/ql/src/test/results/clientpositive/llap/input35.q.out +++ b/ql/src/test/results/clientpositive/llap/input35.q.out @@ -75,31 +75,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input36.q.out b/ql/src/test/results/clientpositive/llap/input36.q.out index 8a8e30c1ea..004a081d36 100644 --- a/ql/src/test/results/clientpositive/llap/input36.q.out +++ b/ql/src/test/results/clientpositive/llap/input36.q.out @@ -75,31 +75,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input38.q.out b/ql/src/test/results/clientpositive/llap/input38.q.out index 161cda2ac7..c5342d8df3 100644 --- a/ql/src/test/results/clientpositive/llap/input38.q.out +++ b/ql/src/test/results/clientpositive/llap/input38.q.out @@ -69,31 +69,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input3_limit.q.out b/ql/src/test/results/clientpositive/llap/input3_limit.q.out index c9616de7a2..411147e834 100644 --- a/ql/src/test/results/clientpositive/llap/input3_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/input3_limit.q.out @@ -115,31 +115,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input5.q.out b/ql/src/test/results/clientpositive/llap/input5.q.out index ab091fe0df..214966aafc 100644 --- a/ql/src/test/results/clientpositive/llap/input5.q.out +++ b/ql/src/test/results/clientpositive/llap/input5.q.out @@ -86,31 +86,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 11 Data size: 29480 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 3144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 3144 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3144 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input6.q.out b/ql/src/test/results/clientpositive/llap/input6.q.out index 6565abd3b9..d0fee27403 100644 --- a/ql/src/test/results/clientpositive/llap/input6.q.out +++ b/ql/src/test/results/clientpositive/llap/input6.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input7.q.out b/ql/src/test/results/clientpositive/llap/input7.q.out index 88afecd8c3..36c097c407 100644 --- a/ql/src/test/results/clientpositive/llap/input7.q.out +++ b/ql/src/test/results/clientpositive/llap/input7.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: c1, c2 Statistics: Num rows: 25 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input8.q.out b/ql/src/test/results/clientpositive/llap/input8.q.out index 6e8c5a1135..52a54204d5 100644 --- a/ql/src/test/results/clientpositive/llap/input8.q.out +++ b/ql/src/test/results/clientpositive/llap/input8.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3 Statistics: Num rows: 25 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input9.q.out b/ql/src/test/results/clientpositive/llap/input9.q.out index dda00b30e1..97772a722d 100644 --- a/ql/src/test/results/clientpositive/llap/input9.q.out +++ b/ql/src/test/results/clientpositive/llap/input9.q.out @@ -61,33 +61,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_part1.q.out b/ql/src/test/results/clientpositive/llap/input_part1.q.out index acd0ac6af2..2cdefa745c 100644 --- a/ql/src/test/results/clientpositive/llap/input_part1.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part1.q.out @@ -83,19 +83,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(CASE WHEN (hr is null) THEN (1) ELSE (null) END), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(CASE WHEN (ds is null) THEN (1) ELSE (null) END), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -146,34 +146,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_part10.q.out b/ql/src/test/results/clientpositive/llap/input_part10.q.out index dac65c50a4..499935ad50 100644 --- a/ql/src/test/results/clientpositive/llap/input_part10.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part10.q.out @@ -92,35 +92,35 @@ STAGE PLANS: outputColumnNames: a, b, ds, ts Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') keys: ds (type: string), ts (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 654 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 654 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input_part2.q.out b/ql/src/test/results/clientpositive/llap/input_part2.q.out index cb411ca417..6bd315a3ac 100644 --- a/ql/src/test/results/clientpositive/llap/input_part2.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part2.q.out @@ -96,19 +96,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 91575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(CASE WHEN (hr is null) THEN (1) ELSE (null) END), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(CASE WHEN (ds is null) THEN (1) ELSE (null) END), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Filter Operator isSamplingPred: false @@ -149,19 +149,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 91575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(CASE WHEN (hr is null) THEN (1) ELSE (null) END), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(CASE WHEN (ds is null) THEN (1) ELSE (null) END), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -250,67 +250,75 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_part5.q.out b/ql/src/test/results/clientpositive/llap/input_part5.q.out index cf429ac3b8..8e0a87e6c2 100644 --- a/ql/src/test/results/clientpositive/llap/input_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part5.q.out @@ -62,33 +62,37 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 151848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(CASE WHEN (hr is null) THEN (1) ELSE (null) END), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(CASE WHEN (ds is null) THEN (1) ELSE (null) END), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out b/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out index e882292886..2605dd70bf 100644 --- a/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out +++ b/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_testxpath.q.out b/ql/src/test/results/clientpositive/llap/input_testxpath.q.out index f5f76d9a6d..25283aab92 100644 --- a/ql/src/test/results/clientpositive/llap/input_testxpath.q.out +++ b/ql/src/test/results/clientpositive/llap/input_testxpath.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: key, value, mapvalue Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(mapvalue, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(mapvalue)), avg(COALESCE(length(mapvalue),0)), count(CASE WHEN (mapvalue is null) THEN (1) ELSE (null) END), compute_bit_vector(mapvalue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4904 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 4224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 4904 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 4224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 4224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 4224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out b/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out index 017206a37a..75949d0b20 100644 --- a/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out +++ b/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: lint_size, lintstring_size, mstringstring_size Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(lint_size, 'hll'), compute_stats(lintstring_size, 'hll'), compute_stats(mstringstring_size, 'hll') + aggregations: min(lint_size), max(lint_size), count(CASE WHEN (lint_size is null) THEN (1) ELSE (null) END), compute_bit_vector(lint_size, 'hll'), min(lintstring_size), max(lintstring_size), count(CASE WHEN (lintstring_size is null) THEN (1) ELSE (null) END), compute_bit_vector(lintstring_size, 'hll'), min(mstringstring_size), max(mstringstring_size), count(CASE WHEN (mstringstring_size is null) THEN (1) ELSE (null) END), compute_bit_vector(mstringstring_size, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4872 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 4080 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 4872 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 4080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 4080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 4080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert1.q.out b/ql/src/test/results/clientpositive/llap/insert1.q.out index 13075a0574..684a28cefd 100644 --- a/ql/src/test/results/clientpositive/llap/insert1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert1.q.out @@ -72,33 +72,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,33 +173,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -280,33 +288,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -377,33 +389,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -484,16 +500,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE @@ -514,48 +530,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out index 09f292f936..60fe404415 100644 --- a/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out @@ -106,11 +106,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -118,19 +118,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -311,31 +311,35 @@ STAGE PLANS: outputColumnNames: one, two Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 832 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 832 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 832 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -485,11 +489,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -497,19 +501,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out index f94d6f6efb..ec81e69dc2 100644 --- a/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out @@ -117,11 +117,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -129,19 +129,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -277,11 +277,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -289,19 +289,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/insert_into1.q.out b/ql/src/test/results/clientpositive/llap/insert_into1.q.out index 960bee71a0..05ba792e8f 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -257,31 +261,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -430,31 +438,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -574,33 +586,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -678,33 +694,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_into2.q.out b/ql/src/test/results/clientpositive/llap/insert_into2.q.out index ba1a56b4b8..93a6874b31 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into2.q.out @@ -88,35 +88,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -314,35 +314,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -505,35 +505,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 50 Data size: 9000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.98 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insert_into3.q.out b/ql/src/test/results/clientpositive/llap/insert_into3.q.out index 9c9820e661..4a8e0a525c 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into3.q.out @@ -119,31 +119,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 50 Data size: 4750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -171,31 +175,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -369,31 +377,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -421,31 +433,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_into4.q.out b/ql/src/test/results/clientpositive/llap/insert_into4.q.out index f2e4bab82c..3eff5bc90b 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into4.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into4.q.out @@ -92,31 +92,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -232,31 +236,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -346,33 +354,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_into5.q.out b/ql/src/test/results/clientpositive/llap/insert_into5.q.out index 2573fd2591..3004d014fe 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into5.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into5.q.out @@ -88,31 +88,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -202,33 +206,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -320,37 +328,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 20 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -452,37 +460,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 20 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insert_into6.q.out b/ql/src/test/results/clientpositive/llap/insert_into6.q.out index fabcf410ca..a182eab807 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into6.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into6.q.out @@ -94,35 +94,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 150 Data size: 27000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -237,37 +237,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2096 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 2096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 2 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 1016 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1428 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1428 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out index bf8270de8c..b99aa17715 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out @@ -82,31 +82,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -225,31 +229,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -368,31 +376,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -512,31 +524,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -655,31 +671,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -816,31 +836,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -969,31 +993,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1112,31 +1140,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1255,31 +1287,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1399,31 +1435,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1542,31 +1582,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1685,31 +1729,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1828,31 +1876,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2214,37 +2266,37 @@ STAGE PLANS: outputColumnNames: i, j, ds Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(j), max(j), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') keys: ds (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2352,37 +2404,37 @@ STAGE PLANS: outputColumnNames: i, j, ds Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(j), max(j), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') keys: ds (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2473,37 +2525,37 @@ STAGE PLANS: outputColumnNames: i, j, ds Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(CASE WHEN (i is null) THEN (1) ELSE (null) END), compute_bit_vector(i, 'hll'), min(j), max(j), count(CASE WHEN (j is null) THEN (1) ELSE (null) END), compute_bit_vector(j, 'hll') keys: ds (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2736,31 +2788,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(CASE WHEN (a1 is null) THEN (1) ELSE (null) END), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3030,31 +3086,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(CASE WHEN (a1 is null) THEN (1) ELSE (null) END), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3330,31 +3390,35 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out index 40fc7f9cec..8c08bc17fd 100644 --- a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out @@ -100,35 +100,35 @@ STAGE PLANS: outputColumnNames: name, age, gpa, year Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(name, 'hll'), compute_stats(age, 'hll'), compute_stats(gpa, 'hll') + aggregations: max(length(name)), avg(COALESCE(length(name),0)), count(CASE WHEN (name is null) THEN (1) ELSE (null) END), compute_bit_vector(name, 'hll'), min(age), max(age), count(CASE WHEN (age is null) THEN (1) ELSE (null) END), compute_bit_vector(age, 'hll'), min(gpa), max(gpa), count(CASE WHEN (gpa is null) THEN (1) ELSE (null) END), compute_bit_vector(gpa, 'hll') keys: year (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 772 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 772 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: decimal(3,2)), _col10 (type: decimal(3,2)), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DECIMAL' (type: string), _col9 (type: decimal(3,2)), _col10 (type: decimal(3,2)), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1009 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1009 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out index 54a7ac0d49..c2cfcabdc1 100644 --- a/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out +++ b/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out @@ -158,31 +158,35 @@ STAGE PLANS: outputColumnNames: change, num Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(change, 'hll'), compute_stats(num, 'hll') + aggregations: max(length(change)), avg(COALESCE(length(change),0)), count(CASE WHEN (change is null) THEN (1) ELSE (null) END), compute_bit_vector(change, 'hll'), max(length(num)), avg(COALESCE(length(num),0)), count(CASE WHEN (num is null) THEN (1) ELSE (null) END), compute_bit_vector(num, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -292,31 +296,35 @@ STAGE PLANS: outputColumnNames: create_ts, change, num Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(create_ts, 'hll'), compute_stats(change, 'hll'), compute_stats(num, 'hll') + aggregations: max(length(create_ts)), avg(COALESCE(length(create_ts),0)), count(CASE WHEN (create_ts is null) THEN (1) ELSE (null) END), compute_bit_vector(create_ts, 'hll'), max(length(change)), avg(COALESCE(length(change),0)), count(CASE WHEN (change is null) THEN (1) ELSE (null) END), compute_bit_vector(change, 'hll'), max(length(num)), avg(COALESCE(length(num),0)), count(CASE WHEN (num is null) THEN (1) ELSE (null) END), compute_bit_vector(num, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join1.q.out b/ql/src/test/results/clientpositive/llap/join1.q.out index 5d26bbe9c1..0188c8cdd5 100644 --- a/ql/src/test/results/clientpositive/llap/join1.q.out +++ b/ql/src/test/results/clientpositive/llap/join1.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join14.q.out b/ql/src/test/results/clientpositive/llap/join14.q.out index dfe667f472..cd2580dac1 100644 --- a/ql/src/test/results/clientpositive/llap/join14.q.out +++ b/ql/src/test/results/clientpositive/llap/join14.q.out @@ -110,31 +110,35 @@ STAGE PLANS: outputColumnNames: c1, c2 Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join17.q.out b/ql/src/test/results/clientpositive/llap/join17.q.out index e432fb8019..8122228234 100644 --- a/ql/src/test/results/clientpositive/llap/join17.q.out +++ b/ql/src/test/results/clientpositive/llap/join17.q.out @@ -218,53 +218,57 @@ STAGE PLANS: outputColumnNames: key1, value1, key2, value2 Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + aggregations: min(key1), max(key1), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), min(key2), max(key2), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join25.q.out b/ql/src/test/results/clientpositive/llap/join25.q.out index 8d72e5a999..c41b908a72 100644 --- a/ql/src/test/results/clientpositive/llap/join25.q.out +++ b/ql/src/test/results/clientpositive/llap/join25.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 39 Data size: 7176 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.974359 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join26.q.out b/ql/src/test/results/clientpositive/llap/join26.q.out index 71a0af2324..2fcd09054a 100644 --- a/ql/src/test/results/clientpositive/llap/join26.q.out +++ b/ql/src/test/results/clientpositive/llap/join26.q.out @@ -257,19 +257,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9836066 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -320,34 +320,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join27.q.out b/ql/src/test/results/clientpositive/llap/join27.q.out index 4328cd6bd6..2aae45f95c 100644 --- a/ql/src/test/results/clientpositive/llap/join27.q.out +++ b/ql/src/test/results/clientpositive/llap/join27.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 40 Data size: 7360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.975 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join28.q.out b/ql/src/test/results/clientpositive/llap/join28.q.out index 485158dc21..673af6ed35 100644 --- a/ql/src/test/results/clientpositive/llap/join28.q.out +++ b/ql/src/test/results/clientpositive/llap/join28.q.out @@ -140,33 +140,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9836066 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join29.q.out b/ql/src/test/results/clientpositive/llap/join29.q.out index b2df356f2e..f8998037cd 100644 --- a/ql/src/test/results/clientpositive/llap/join29.q.out +++ b/ql/src/test/results/clientpositive/llap/join29.q.out @@ -142,31 +142,35 @@ STAGE PLANS: outputColumnNames: key, cnt1, cnt2 Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt1), max(cnt1), count(CASE WHEN (cnt1 is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt1, 'hll'), min(cnt2), max(cnt2), count(CASE WHEN (cnt2 is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt2, 'hll') minReductionHashAggr: 0.9166667 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join3.q.out b/ql/src/test/results/clientpositive/llap/join3.q.out index 237c0a3690..36b0e07258 100644 --- a/ql/src/test/results/clientpositive/llap/join3.q.out +++ b/ql/src/test/results/clientpositive/llap/join3.q.out @@ -143,31 +143,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join30.q.out b/ql/src/test/results/clientpositive/llap/join30.q.out index 0a79ce5848..d515c733c2 100644 --- a/ql/src/test/results/clientpositive/llap/join30.q.out +++ b/ql/src/test/results/clientpositive/llap/join30.q.out @@ -122,31 +122,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join31.q.out b/ql/src/test/results/clientpositive/llap/join31.q.out index 20914a471d..207fd6eff6 100644 --- a/ql/src/test/results/clientpositive/llap/join31.q.out +++ b/ql/src/test/results/clientpositive/llap/join31.q.out @@ -150,31 +150,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join32.q.out b/ql/src/test/results/clientpositive/llap/join32.q.out index 20db9a3bbd..3d89bf1824 100644 --- a/ql/src/test/results/clientpositive/llap/join32.q.out +++ b/ql/src/test/results/clientpositive/llap/join32.q.out @@ -258,19 +258,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -321,34 +321,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 106050f45f..353acd3749 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -266,19 +266,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -329,34 +329,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -839,19 +843,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.989899 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -897,34 +901,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1326,19 +1334,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1389,34 +1397,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1807,19 +1819,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1894,34 +1906,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2217,33 +2233,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2514,33 +2534,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join33.q.out b/ql/src/test/results/clientpositive/llap/join33.q.out index bfbb4ba310..ab768f4a24 100644 --- a/ql/src/test/results/clientpositive/llap/join33.q.out +++ b/ql/src/test/results/clientpositive/llap/join33.q.out @@ -258,19 +258,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -321,34 +321,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join34.q.out b/ql/src/test/results/clientpositive/llap/join34.q.out index a58ee4af3d..cdcda8da1b 100644 --- a/ql/src/test/results/clientpositive/llap/join34.q.out +++ b/ql/src/test/results/clientpositive/llap/join34.q.out @@ -121,19 +121,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -236,19 +236,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -370,34 +370,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/join35.q.out b/ql/src/test/results/clientpositive/llap/join35.q.out index cd435a606c..6e16c8e943 100644 --- a/ql/src/test/results/clientpositive/llap/join35.q.out +++ b/ql/src/test/results/clientpositive/llap/join35.q.out @@ -330,53 +330,57 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(val2), max(val2), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 6 Execution mode: llap Needs Tagging: false @@ -434,19 +438,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(val2), max(val2), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/join36.q.out b/ql/src/test/results/clientpositive/llap/join36.q.out index 8071b04748..b4ad05aa1e 100644 --- a/ql/src/test/results/clientpositive/llap/join36.q.out +++ b/ql/src/test/results/clientpositive/llap/join36.q.out @@ -117,16 +117,16 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 315 Data size: 3780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(val2), max(val2), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -155,17 +155,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join37.q.out b/ql/src/test/results/clientpositive/llap/join37.q.out index 3c09084c9d..a1a9a0c010 100644 --- a/ql/src/test/results/clientpositive/llap/join37.q.out +++ b/ql/src/test/results/clientpositive/llap/join37.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 39 Data size: 7176 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.974359 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join39.q.out b/ql/src/test/results/clientpositive/llap/join39.q.out index a2bc64d1b8..1865c8f6d2 100644 --- a/ql/src/test/results/clientpositive/llap/join39.q.out +++ b/ql/src/test/results/clientpositive/llap/join39.q.out @@ -67,16 +67,16 @@ STAGE PLANS: outputColumnNames: key, value, key1, val2 Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(key1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -105,17 +105,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join4.q.out b/ql/src/test/results/clientpositive/llap/join4.q.out index 7eae3ae6d7..50df781822 100644 --- a/ql/src/test/results/clientpositive/llap/join4.q.out +++ b/ql/src/test/results/clientpositive/llap/join4.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join5.q.out b/ql/src/test/results/clientpositive/llap/join5.q.out index dce3d2eb06..7e5430825a 100644 --- a/ql/src/test/results/clientpositive/llap/join5.q.out +++ b/ql/src/test/results/clientpositive/llap/join5.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join6.q.out b/ql/src/test/results/clientpositive/llap/join6.q.out index e20d166590..208c749da3 100644 --- a/ql/src/test/results/clientpositive/llap/join6.q.out +++ b/ql/src/test/results/clientpositive/llap/join6.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join7.q.out b/ql/src/test/results/clientpositive/llap/join7.q.out index 2f4c862200..e9ad688579 100644 --- a/ql/src/test/results/clientpositive/llap/join7.q.out +++ b/ql/src/test/results/clientpositive/llap/join7.q.out @@ -170,31 +170,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6 Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join8.q.out b/ql/src/test/results/clientpositive/llap/join8.q.out index a7f500baa3..ccdc12f9a7 100644 --- a/ql/src/test/results/clientpositive/llap/join8.q.out +++ b/ql/src/test/results/clientpositive/llap/join8.q.out @@ -130,31 +130,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join9.q.out b/ql/src/test/results/clientpositive/llap/join9.q.out index 2ce0126eb0..7cba0cf336 100644 --- a/ql/src/test/results/clientpositive/llap/join9.q.out +++ b/ql/src/test/results/clientpositive/llap/join9.q.out @@ -226,53 +226,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out index 49b7a6b2df..89582b666b 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out @@ -306,16 +306,16 @@ STAGE PLANS: outputColumnNames: key, c1 Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -335,17 +335,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -373,31 +377,35 @@ STAGE PLANS: outputColumnNames: key, c1 Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/lineage1.q.out b/ql/src/test/results/clientpositive/llap/lineage1.q.out index cff56bb936..33176d7056 100644 --- a/ql/src/test/results/clientpositive/llap/lineage1.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage1.q.out @@ -164,31 +164,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 128 Data size: 5426 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -221,16 +225,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 128 Data size: 5426 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 6576a8468c..85dc772009 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -5,7 +5,8 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src1 PREHOOK: Output: database:default PREHOOK: Output: default@src2 -{"version":"1.0","engine":"tez","database":"default","hash":"87921246fb098d44c05e0ccd9ecb0676","queryText":"create table src2 as select key key2, value value2 from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src2.value2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"87921246fb098d44c05e0ccd9ecb0676","queryText":"create table src2 as select key key2, value value2 from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src2.value2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} PREHOOK: query: select * from src1 where key is not null and value is not null limit 3 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -31,12 +32,14 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src1 PREHOOK: Output: database:default PREHOOK: Output: default@dest1_n56 -{"version":"1.0","engine":"tez","database":"default","hash":"01251b1a2a539f7bb1d533cf6a9de47d","queryText":"create table dest1_n56 as select * from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"01251b1a2a539f7bb1d533cf6a9de47d","queryText":"create table dest1_n56 as select * from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} PREHOOK: query: insert into table dest1_n56 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1_n56 -{"version":"1.0","engine":"tez","database":"default","hash":"d3d379a20e27c1618037bd6b8e840b13","queryText":"insert into table dest1_n56 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"d3d379a20e27c1618037bd6b8e840b13","queryText":"insert into table dest1_n56 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1_n56.value from dest1_n56 PREHOOK: type: QUERY PREHOOK: Input: default@dest1_n56 @@ -461,26 +464,30 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"7e2a275cdee3a519d901b7b178eefcd7","queryText":"create table dest2_n11 as select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"7e2a275cdee3a519d901b7b178eefcd7","queryText":"create table dest2_n11 as select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"b275c2987a11e52fcecb46cfee2fb17e","queryText":"insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"b275c2987a11e52fcecb46cfee2fb17e","queryText":"insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"cabe07848c79ab95f0937586e75ad64e","queryText":"insert into table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"cabe07848c79ab95f0937586e75ad64e","queryText":"insert into table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2_n11 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"4c13fe982c4d22e5735ba469dee4b3d8","queryText":"insert into table dest2_n11\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"4c13fe982c4d22e5735ba469dee4b3d8","queryText":"insert into table dest2_n11\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -523,14 +530,16 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest3_n0 -{"version":"1.0","engine":"tez","database":"default","hash":"04c85db3424d79a3663c0532bc1e0a35","queryText":"create table dest3_n0 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"04c85db3424d79a3663c0532bc1e0a35","queryText":"create table dest3_n0 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"fb315308480b6e64466a6db5246895d6","queryText":"insert overwrite table dest2_n11\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"fb315308480b6e64466a6db5246895d6","queryText":"insert overwrite table dest2_n11\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1_n0 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1_n0(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +561,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1_n0 -{"version":"1.0","engine":"tez","database":"default","hash":"40b5d904f13549d8c25bd0be758f5b6f","queryText":"INSERT OVERWRITE TABLE dest_l1_n0\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"p1.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"p2.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(j.key), 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(j.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"40b5d904f13549d8c25bd0be758f5b6f","queryText":"INSERT OVERWRITE TABLE dest_l1_n0\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"p1.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"p2.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept_n10 @@ -593,7 +603,8 @@ PREHOOK: Input: default@dept_n10 PREHOOK: Input: default@emp PREHOOK: Input: default@project_n10 PREHOOK: Output: default@tgt_n10 -{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept_n10 d ON d.dept_id = em.dept_id\n ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept_n10.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project_n10.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project_n10.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]} +Result schema has 6 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept_n10 d ON d.dept_id = em.dept_id\n ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -604,7 +615,8 @@ PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"f9a01e400eb50cc3c5ec0741ed20994c","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0,1],"expression":"col1","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToByte(col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"compute_stats(col1, 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToByte(col1), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(col1), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"f9a01e400eb50cc3c5ec0741ed20994c","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0,1],"expression":"col1","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToByte(col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(col1)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -625,7 +637,8 @@ PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"9f432e7641bec615db3eb365daa3eeae","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0,1,2,3],"expression":"col1","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1,2,3],"expression":"compute_stats(col1, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"9f432e7641bec615db3eb365daa3eeae","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0,1,2,3],"expression":"col1","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 @@ -659,7 +672,8 @@ PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 PREHOOK: Output: database:default PREHOOK: Output: default@t_n10 -{"version":"1.0","engine":"tez","database":"default","hash":"1a18373814a0ccf82ee1409db6a912b5","queryText":"create table t_n10 as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id > 0)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1],"expression":"((b.id > 0) and (b.c3 = 15))","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.dest_l2.c2, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.dest_l2.c3, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n10.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t_n10.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"1a18373814a0ccf82ee1409db6a912b5","queryText":"create table t_n10 as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id > 0)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1],"expression":"((b.id > 0) and (b.c3 = 15))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n10.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t_n10.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)), concat(substr(src1.key,1,1),sum(substr(src1.value,5))) from src1 @@ -704,4 +718,5 @@ from relations lateral view explode(ep1_ids) rel1 as ep1_id PREHOOK: type: QUERY PREHOOK: Input: default@relations PREHOOK: Output: default@rels_exploded -{"version":"1.0","engine":"tez","database":"default","hash":"56b2b197f394a30537ce1acf835ff8e1","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32))","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32))","edgeType":"PROJECTION"},{"sources":[8],"targets":[0],"expression":"compute_stats(default.relations.identity, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"expression":"compute_stats(default.relations.type, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"expression":"compute_stats(default.relations.ep1_src_type, 'hll')","edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"expression":"compute_stats(default.relations.ep1_type, 'hll')","edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"expression":"compute_stats(default.relations.ep2_src_type, 'hll')","edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"expression":"compute_stats(default.relations.ep2_type, 'hll')","edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"compute_stats(CAST( rel1._col11 AS CHAR(32)), 'hll')","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"compute_stats(CAST( rel2._col12 AS CHAR(32)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} +Result schema has 8 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"56b2b197f394a30537ce1acf835ff8e1","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32))","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32))","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index d762fbc049..c87d7c0c92 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -10,7 +10,8 @@ insert into table d1 select x + length(y) PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 -{"version":"1.0","engine":"tez","database":"default","hash":"a1d51634883428cbc72084be0ec2e641","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[1,2],"targets":[0],"expression":"compute_stats((UDFToInteger(a.ctinyint) + length(b.cstring1)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"a1d51634883428cbc72084be0ec2e641","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: drop table if exists d2 PREHOOK: type: DROPTABLE PREHOOK: query: create table d2(b varchar(128)) @@ -25,7 +26,9 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"84e3cdc38011da5842162df175b2a494","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t_n20.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1,0],"expression":"(t_n20.x > 0Y)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(x), 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[0],"expression":"compute_stats(CAST( y AS varchar(128)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +Result schema has 1 fields, but we don't get as many dependencies +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"84e3cdc38011da5842162df175b2a494","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t_n20.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t_n20.x > 0Y)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t_n20 PREHOOK: type: DROPTABLE PREHOOK: query: create table t_n20 as @@ -36,7 +39,8 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src1 PREHOOK: Output: database:default PREHOOK: Output: default@t_n20 -{"version":"1.0","engine":"tez","database":"default","hash":"5a2daa3d8508025880412b524351c849","queryText":"create table t_n20 as\nselect * from\n (select * from\n (select key from src1 limit 1) v1) v2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[1],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n20.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src1.key"}]} +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"5a2daa3d8508025880412b524351c849","queryText":"create table t_n20 as\nselect * from\n (select * from\n (select key from src1 limit 1) v1) v2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n20.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src1.key"}]} PREHOOK: query: drop table if exists dest_l1_n2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l1_n2(a int, b varchar(128)) @@ -51,7 +55,8 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1_n2@ds=today -{"version":"1.0","engine":"tez","database":"default","hash":"b56115e94fe07fda7b4d2ffecf57adc6","queryText":"insert into table dest_l1_n2 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1,2],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(default.alltypesorc.cint, 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( alltypesorc.cstring1 AS varchar(128)), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'today'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"b56115e94fe07fda7b4d2ffecf57adc6","queryText":"insert into table dest_l1_n2 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1_n2 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +66,8 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1_n2@ds=tomorrow -{"version":"1.0","engine":"tez","database":"default","hash":"53b7b48554f009345159739b3ab04fa1","queryText":"insert into table dest_l1_n2 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[3],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[5,3],"targets":[0,1,2],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2],"expression":"(min(default.alltypesorc.cbigint) > 10L)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(min(default.alltypesorc.cint), 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( min(default.alltypesorc.cstring1) AS varchar(128)), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'tomorrow'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"53b7b48554f009345159739b3ab04fa1","queryText":"insert into table dest_l1_n2 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10L)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -348,12 +354,14 @@ PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year f PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp1 -{"version":"1.0","engine":"tez","database":"default","hash":"8d922f2fb420d3dffd87766f09123ccc","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[6],"targets":[4],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[5],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"first"},{"id":5,"vertexType":"COLUMN","vertexId":"word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} +Result schema has 3 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"8d922f2fb420d3dffd87766f09123ccc","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2 -{"version":"1.0","engine":"tez","database":"default","hash":"8fae561192d76da429955aebc0fd87f9","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[11],"targets":[4,5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[6],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[7],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":5,"vertexType":"COLUMN","vertexId":"month"},{"id":6,"vertexType":"COLUMN","vertexId":"first"},{"id":7,"vertexType":"COLUMN","vertexId":"word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":9,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":10,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"8fae561192d76da429955aebc0fd87f9","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp @@ -387,7 +395,9 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -Failed to log lineage graph, query is not affected -java.lang.IndexOutOfBoundsException: Index: 2, Size: 2 -#### A masked pattern was here #### - +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"e540a88155ffa4bf6842a4fdf3bfe639","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out index fa0d8f0ed8..09f45fcb66 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out @@ -87,12 +87,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -100,9 +100,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -191,30 +191,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index a22819003b..129de8d8f0 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out index 4fe1310685..9ead258f9f 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out index 9b2ec89b48..82ce304cbb 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4, col5, ds, hr Statistics: Num rows: 500 Data size: 306500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(CASE WHEN (col4 is null) THEN (1) ELSE (null) END), compute_bit_vector(col4, 'hll'), max(length(col5)), avg(COALESCE(length(col5),0)), count(CASE WHEN (col5 is null) THEN (1) ELSE (null) END), compute_bit_vector(col5, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Statistics: Num rows: 1 Data size: 1340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1340 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary), _col18 (type: int), _col19 (type: struct), _col20 (type: bigint), _col21 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col18,0)) (type: bigint), COALESCE(_col19,0) (type: double), _col20 (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1510 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1510 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types struct:struct:struct:struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29,_col30,_col31 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out index ac035e3203..13e107dea5 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4, col5, ds, hr Statistics: Num rows: 500 Data size: 315000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(CASE WHEN (col4 is null) THEN (1) ELSE (null) END), compute_bit_vector(col4, 'hll'), max(length(col5)), avg(COALESCE(length(col5),0)), count(CASE WHEN (col5 is null) THEN (1) ELSE (null) END), compute_bit_vector(col5, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Statistics: Num rows: 1 Data size: 1357 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1357 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary), _col18 (type: int), _col19 (type: struct), _col20 (type: bigint), _col21 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Statistics: Num rows: 1 Data size: 1017 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col18,0)) (type: bigint), COALESCE(_col19,0) (type: double), _col20 (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1527 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1527 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types struct:struct:struct:struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29,_col30,_col31 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out index e79a6e7a58..b7059eab19 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out @@ -77,19 +77,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -135,34 +135,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out index dcae664bb5..447b6a824c 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out @@ -92,12 +92,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -105,9 +105,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -196,30 +196,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out index a36d0b8e50..f1b1b5c305 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out @@ -82,12 +82,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -95,9 +95,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -186,30 +186,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out index 68144f03f1..bf38a29d5f 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out @@ -92,12 +92,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -105,9 +105,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -196,30 +196,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -413,12 +413,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -426,9 +426,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -517,30 +517,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out index e5cb4f95e6..855e08e71e 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out @@ -87,12 +87,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -100,9 +100,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -191,30 +191,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out index dd1e97bec2..368f0c5524 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out @@ -91,12 +91,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,9 +104,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -195,30 +195,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -456,12 +456,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,9 +469,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -560,30 +560,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out index 87cb08fe12..f87c7ddd25 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out @@ -91,12 +91,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,9 +104,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -195,30 +195,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -456,12 +456,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,9 +469,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -560,30 +560,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out index 1938bfbf4e..b53be4eda6 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out @@ -92,12 +92,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -105,9 +105,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -196,30 +196,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -413,12 +413,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -426,9 +426,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -517,30 +517,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f2b9cd3479..933324d79f 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -159,37 +159,37 @@ STAGE PLANS: outputColumnNames: ctinyint, csmallint, cint Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll') + aggregations: min(ctinyint), max(ctinyint), count(CASE WHEN (ctinyint is null) THEN (1) ELSE (null) END), compute_bit_vector(ctinyint, 'hll'), min(csmallint), max(csmallint), count(CASE WHEN (csmallint is null) THEN (1) ELSE (null) END), compute_bit_vector(csmallint, 'hll') keys: cint (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 5 Data size: 1620 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 5 Data size: 1620 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: tinyint), _col2 (type: tinyint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: smallint), _col6 (type: smallint), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 5 Data size: 1620 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 5 Data size: 2660 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2660 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index c7219f624c..d669b9b445 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -105,19 +105,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -138,37 +138,37 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-12-31' (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -177,18 +177,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out index f9ced8e400..68edb5831c 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out @@ -85,37 +85,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out index 708720eb50..4622592732 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out @@ -100,19 +100,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 221 Data size: 79118 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -141,37 +141,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 221 Data size: 79118 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out index cf08318e71..a1752f3265 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out @@ -136,35 +136,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 634 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 2 Data size: 634 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 498 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,19 +192,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 634 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 2 Data size: 634 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -228,19 +228,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 634 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 2 Data size: 634 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index 6bb88e25b9..381438fa1b 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -98,35 +98,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out index 48ca6a59f3..0e39ea4dd1 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out @@ -97,37 +97,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 4 Data size: 3328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 4 Data size: 3328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 4 Data size: 2784 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 4 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 22bb823c54..012036f606 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -58,19 +58,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 80750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 250 Data size: 80750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -84,18 +84,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 63750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 89250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 89250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out index f46a6bcce2..ffaada4e91 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out @@ -119,12 +119,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -132,9 +132,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Filter Operator isSamplingPred: false @@ -178,12 +178,12 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: '2008-12-31' (type: string), _col1 (type: string) @@ -191,9 +191,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -358,30 +358,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -396,30 +396,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out index 451a0a2ddf..563c717acd 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out @@ -85,37 +85,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 546000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out index 57654464da..5105831113 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out index 3cf3584e51..466d31788b 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out @@ -81,31 +81,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce3.q.out b/ql/src/test/results/clientpositive/llap/mapreduce3.q.out index f691d40eb6..03502eb7e9 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce3.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce3.q.out @@ -81,31 +81,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce4.q.out b/ql/src/test/results/clientpositive/llap/mapreduce4.q.out index 12882a8684..08c194f010 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce4.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce4.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce5.q.out b/ql/src/test/results/clientpositive/llap/mapreduce5.q.out index 4c014c3223..61ccf68b6c 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce5.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce5.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce6.q.out b/ql/src/test/results/clientpositive/llap/mapreduce6.q.out index cf3e69b2e0..4040762c44 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce6.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce6.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce7.q.out b/ql/src/test/results/clientpositive/llap/mapreduce7.q.out index 10d42cefee..361fee6775 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce7.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce7.q.out @@ -81,31 +81,35 @@ STAGE PLANS: outputColumnNames: k, v, key, ten, one, value Statistics: Num rows: 500 Data size: 140500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(k)), avg(COALESCE(length(k),0)), count(CASE WHEN (k is null) THEN (1) ELSE (null) END), compute_bit_vector(k, 'hll'), max(length(v)), avg(COALESCE(length(v),0)), count(CASE WHEN (v is null) THEN (1) ELSE (null) END), compute_bit_vector(v, 'hll'), min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce8.q.out b/ql/src/test/results/clientpositive/llap/mapreduce8.q.out index 1a38974d81..654a1ac4b3 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce8.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce8.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: k, v, key, ten, one, value Statistics: Num rows: 500 Data size: 140500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(k)), avg(COALESCE(length(k),0)), count(CASE WHEN (k is null) THEN (1) ELSE (null) END), compute_bit_vector(k, 'hll'), max(length(v)), avg(COALESCE(length(v),0)), count(CASE WHEN (v is null) THEN (1) ELSE (null) END), compute_bit_vector(v, 'hll'), min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(CASE WHEN (ten is null) THEN (1) ELSE (null) END), compute_bit_vector(ten, 'hll'), min(one), max(one), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/masking_mv.q.out b/ql/src/test/results/clientpositive/llap/masking_mv.q.out index 05a0f613a1..01a5b57b54 100644 --- a/ql/src/test/results/clientpositive/llap/masking_mv.q.out +++ b/ql/src/test/results/clientpositive/llap/masking_mv.q.out @@ -64,33 +64,37 @@ STAGE PLANS: outputColumnNames: col1 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -753,33 +757,37 @@ STAGE PLANS: outputColumnNames: col1 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out index 9af6567987..a9a3732415 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out @@ -85,31 +85,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -497,33 +501,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -842,31 +850,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1101,31 +1113,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 55 Data size: 9955 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1251,31 +1267,35 @@ STAGE PLANS: outputColumnNames: value, key, tes"t, te*#"s"t Statistics: Num rows: 55 Data size: 10835 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll'), compute_stats(tes"t, 'hll'), compute_stats(te*#"s"t, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(tes"t), max(tes"t), count(CASE WHEN (tes"t is null) THEN (1) ELSE (null) END), compute_bit_vector(tes"t, 'hll'), min(te*#"s"t), max(te*#"s"t), count(CASE WHEN (te*#"s"t is null) THEN (1) ELSE (null) END), compute_bit_vector(te*#"s"t, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out index 2d11b3fca1..b1c8cb6ec9 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -638,31 +642,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out index 13d7f5a756..848a5485f4 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out @@ -183,31 +183,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -832,16 +836,16 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Filter Operator predicate: ((_col0 = _col4) and (_col1 = _col5)) (type: boolean) Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE @@ -860,17 +864,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1223,31 +1231,35 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1524,31 +1536,35 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1844,16 +1860,16 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Filter Operator predicate: ((_col0 = _col4) and (_col1 = _col5)) (type: boolean) Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE @@ -1872,17 +1888,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out index e2fff711cd..2234f6ac6a 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out @@ -300,31 +300,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -575,31 +579,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -806,31 +814,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1041,31 +1053,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out index 0a106ba0a3..52eebe598e 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -638,31 +642,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out index 6cce2a45f7..842e946440 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -762,31 +766,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out index 5961735f29..c59f072ad1 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out @@ -85,31 +85,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -363,31 +367,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -668,33 +676,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -889,31 +901,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1242,31 +1258,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out index 25c5aedc12..9213b8739c 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out @@ -86,35 +86,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 55 Data size: 10230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') keys: col3 (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -985,37 +985,37 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 18 Data size: 3384 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1447,35 +1447,35 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 18 Data size: 3402 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1945,35 +1945,35 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 55 Data size: 10395 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2261,35 +2261,35 @@ STAGE PLANS: outputColumnNames: value, key, tes"t, te*#"s"t, partkey Statistics: Num rows: 55 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll'), compute_stats(tes"t, 'hll'), compute_stats(te*#"s"t, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(tes"t), max(tes"t), count(CASE WHEN (tes"t is null) THEN (1) ELSE (null) END), compute_bit_vector(tes"t, 'hll'), min(te*#"s"t), max(te*#"s"t), count(CASE WHEN (te*#"s"t is null) THEN (1) ELSE (null) END), compute_bit_vector(te*#"s"t, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 46872 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 27 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 46872 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 27 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 27 Data size: 18144 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 27 Data size: 28944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 28944 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out index 85e22c791d..a71bba9dd0 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out @@ -62,19 +62,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -88,18 +88,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -653,19 +653,19 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: key (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -679,18 +679,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2259 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1033,19 +1033,19 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: key (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -1057,18 +1057,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2259 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out index da6e057636..d1838e4d63 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out @@ -62,19 +62,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -88,18 +88,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out index 26e3856761..4ea80e2af5 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out @@ -328,31 +328,35 @@ STAGE PLANS: outputColumnNames: quartile, total Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') + aggregations: min(quartile), max(quartile), count(CASE WHEN (quartile is null) THEN (1) ELSE (null) END), compute_bit_vector(quartile, 'hll'), min(total), max(total), count(CASE WHEN (total is null) THEN (1) ELSE (null) END), compute_bit_vector(total, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), _col3 (type: binary), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -635,31 +639,35 @@ STAGE PLANS: outputColumnNames: quartile, total Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') + aggregations: min(quartile), max(quartile), count(CASE WHEN (quartile is null) THEN (1) ELSE (null) END), compute_bit_vector(quartile, 'hll'), min(total), max(total), count(CASE WHEN (total is null) THEN (1) ELSE (null) END), compute_bit_vector(total, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), _col3 (type: binary), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -965,31 +973,35 @@ STAGE PLANS: outputColumnNames: total_views, quartile, program Statistics: Num rows: 6 Data size: 1266 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(total_views, 'hll'), compute_stats(quartile, 'hll'), compute_stats(program, 'hll') + aggregations: min(total_views), max(total_views), count(CASE WHEN (total_views is null) THEN (1) ELSE (null) END), compute_bit_vector(total_views, 'hll'), min(quartile), max(quartile), count(CASE WHEN (quartile is null) THEN (1) ELSE (null) END), compute_bit_vector(quartile, 'hll'), max(length(program)), avg(COALESCE(length(program),0)), count(CASE WHEN (program is null) THEN (1) ELSE (null) END), compute_bit_vector(program, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(12,1)), _col5 (type: decimal(12,1)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(12,1)), _col5 (type: decimal(12,1)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/merge1.q.out b/ql/src/test/results/clientpositive/llap/merge1.q.out index ef4378fff7..d2682bd840 100644 --- a/ql/src/test/results/clientpositive/llap/merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/merge1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, val Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -554,33 +558,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -666,33 +674,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/merge2.q.out b/ql/src/test/results/clientpositive/llap/merge2.q.out index ee4681df80..02b77fa807 100644 --- a/ql/src/test/results/clientpositive/llap/merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge2.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, val Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -554,33 +558,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -666,33 +674,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/merge3.q.out b/ql/src/test/results/clientpositive/llap/merge3.q.out index 141e40891e..f59cfda48b 100644 --- a/ql/src/test/results/clientpositive/llap/merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/merge3.q.out @@ -118,19 +118,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -176,34 +176,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2375,12 +2379,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -2388,9 +2392,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -2477,30 +2481,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 1024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -4780,30 +4784,30 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 1024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/merge4.q.out b/ql/src/test/results/clientpositive/llap/merge4.q.out index e15a91c038..75875b5cf3 100644 --- a/ql/src/test/results/clientpositive/llap/merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge4.q.out @@ -56,37 +56,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1196,37 +1196,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2886,35 +2886,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 742 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 742 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 606 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out index 72db3e800c..894f10a310 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out @@ -88,19 +88,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 49864 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 99 Data size: 95800 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 99 Data size: 95800 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -114,18 +114,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 49 Data size: 45000 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 49 Data size: 45000 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 49 Data size: 45000 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -758,11 +758,11 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -770,21 +770,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -1398,19 +1398,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 49864 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 99 Data size: 95800 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 99 Data size: 95800 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -1424,18 +1424,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 49 Data size: 45000 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 49 Data size: 45000 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 49 Data size: 45000 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out index 9bf582529b..0913b460ec 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out @@ -107,19 +107,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 297 Data size: 148488 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 297 Data size: 409848 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 297 Data size: 286296 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 297 Data size: 409848 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 297 Data size: 286296 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -133,18 +133,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 148 Data size: 196480 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 148 Data size: 134912 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 148 Data size: 196480 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 148 Data size: 134912 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 148 Data size: 196480 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 148 Data size: 134912 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out index d8b4c0dbd2..4c1e9f5ae7 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out @@ -171,19 +171,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 594 Data size: 405536 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 594 Data size: 928256 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 594 Data size: 681152 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 594 Data size: 928256 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 594 Data size: 681152 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -197,18 +197,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 297 Data size: 448304 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 297 Data size: 324752 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 297 Data size: 448304 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 297 Data size: 324752 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 297 Data size: 448304 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 297 Data size: 324752 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out index 49b44ac297..5fb8dc803c 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out @@ -175,37 +175,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out index 1b2567894f..506f36cbb2 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out @@ -151,37 +151,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 618 Data size: 221244 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out index fd28d39af7..de5168153c 100644 --- a/ql/src/test/results/clientpositive/llap/mm_all.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -100,37 +100,37 @@ STAGE PLANS: outputColumnNames: key, key_mm Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: key_mm (type: int) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 3 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multi_insert.q.out b/ql/src/test/results/clientpositive/llap/multi_insert.q.out index 9eff316da0..39b49f2878 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert.q.out @@ -72,16 +72,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -102,48 +102,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -295,16 +303,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -325,48 +333,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -518,16 +534,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -548,48 +564,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -741,16 +765,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -771,48 +795,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -983,16 +1015,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1014,46 +1046,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1217,16 +1257,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1248,46 +1288,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1451,16 +1499,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1482,46 +1530,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1685,16 +1741,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1716,46 +1772,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1902,16 +1966,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1928,16 +1992,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -1965,16 +2029,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1991,48 +2055,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2207,16 +2279,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2233,16 +2305,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2270,16 +2342,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2296,48 +2368,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2512,16 +2592,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2538,16 +2618,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2575,16 +2655,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2601,48 +2681,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2817,16 +2905,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2843,16 +2931,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2880,16 +2968,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2906,48 +2994,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out index 57a94217b8..8717d4df2a 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out @@ -98,16 +98,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 105 Data size: 9555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 9570 Basic stats: COMPLETE Column stats: COMPLETE @@ -134,46 +134,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 105 Data size: 9555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -370,16 +378,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 316 Data size: 28756 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -406,46 +414,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 105 Data size: 9555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out index 42d167ec20..2596cde35a 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out @@ -93,17 +93,21 @@ STAGE PLANS: outputColumnNames: count Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(count, 'hll') + aggregations: min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -122,17 +126,21 @@ STAGE PLANS: outputColumnNames: percentile Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(percentile, 'hll') + aggregations: min(percentile), max(percentile), count(CASE WHEN (percentile is null) THEN (1) ELSE (null) END), compute_bit_vector(percentile, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out index 0777184745..59507597c6 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out @@ -129,31 +129,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -180,31 +184,35 @@ STAGE PLANS: outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -352,31 +360,35 @@ STAGE PLANS: outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -403,31 +415,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1888,31 +1904,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1939,31 +1959,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2134,31 +2158,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2185,31 +2213,35 @@ STAGE PLANS: outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -2236,31 +2268,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(CASE WHEN (keyd is null) THEN (1) ELSE (null) END), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out index 554a174f5c..04ce2ffa82 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out @@ -120,16 +120,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 106 Data size: 9646 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -156,16 +156,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 106 Data size: 9646 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -192,61 +192,73 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 106 Data size: 9646 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(count), max(count), count(CASE WHEN (count is null) THEN (1) ELSE (null) END), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out index 43e6b6c65d..d61732bb2b 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out @@ -98,16 +98,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -135,16 +135,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Lateral View Forward Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -171,16 +171,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -208,48 +208,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -526,31 +534,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -577,31 +589,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -828,31 +844,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -884,16 +904,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 542 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -920,46 +940,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 542 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1269,31 +1297,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1320,31 +1352,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1371,31 +1407,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1742,31 +1782,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1793,31 +1837,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1849,16 +1897,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE @@ -1885,46 +1933,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out index 2bbf32e0a3..55530ab845 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out @@ -125,16 +125,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -172,17 +172,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -218,32 +222,40 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out index 8a042db312..c83176f4d3 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out @@ -72,16 +72,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -102,48 +102,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -295,16 +303,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -325,48 +333,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -518,16 +534,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -548,48 +564,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -741,16 +765,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -771,48 +795,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -983,16 +1015,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1014,46 +1046,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1217,16 +1257,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1248,46 +1288,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1451,16 +1499,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1482,46 +1530,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1685,16 +1741,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1716,46 +1772,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1902,16 +1966,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1928,16 +1992,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -1965,16 +2029,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1991,48 +2055,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2207,16 +2279,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2233,16 +2305,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2270,16 +2342,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2296,48 +2368,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2512,16 +2592,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2538,16 +2618,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2575,16 +2655,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2601,48 +2681,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2817,16 +2905,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2843,16 +2931,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2880,16 +2968,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2906,48 +2994,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -4053,16 +4149,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -4083,16 +4179,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -4108,32 +4204,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4363,16 +4467,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -4393,16 +4497,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -4418,32 +4522,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4673,16 +4785,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -4703,16 +4815,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -4728,32 +4840,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4983,16 +5103,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -5013,16 +5133,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -5038,32 +5158,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out index 335af8f9c6..4ee19c291d 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out @@ -161,17 +161,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 58 Data size: 10324 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -192,17 +196,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 58 Data size: 10324 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out index 4af33dcfe8..2fb29c5958 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out @@ -153,31 +153,35 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -299,31 +303,35 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -449,16 +457,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -479,46 +487,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -662,16 +678,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -692,46 +708,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -883,16 +907,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -913,46 +937,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1104,16 +1136,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -1134,46 +1166,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1337,16 +1377,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -1367,46 +1407,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1570,16 +1618,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -1600,46 +1648,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(CASE WHEN (ida is null) THEN (1) ELSE (null) END), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(CASE WHEN (vala is null) THEN (1) ELSE (null) END), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(CASE WHEN (idb is null) THEN (1) ELSE (null) END), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(CASE WHEN (valb is null) THEN (1) ELSE (null) END), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out index 998772752c..34d234ccac 100644 --- a/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out @@ -142,31 +142,35 @@ STAGE PLANS: outputColumnNames: d1, d2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -193,31 +197,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -362,31 +370,35 @@ STAGE PLANS: outputColumnNames: d1, d2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -413,31 +425,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -582,31 +598,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(CASE WHEN (d4 is null) THEN (1) ELSE (null) END), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -633,31 +653,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -778,16 +802,16 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(CASE WHEN (d4 is null) THEN (1) ELSE (null) END), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -811,46 +835,54 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(CASE WHEN (d4 is null) THEN (1) ELSE (null) END), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1021,31 +1053,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(CASE WHEN (d4 is null) THEN (1) ELSE (null) END), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 612 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1072,31 +1108,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(CASE WHEN (d3 is null) THEN (1) ELSE (null) END), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1123,31 +1163,35 @@ STAGE PLANS: outputColumnNames: d1, d2 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + aggregations: min(d1), max(d1), count(CASE WHEN (d1 is null) THEN (1) ELSE (null) END), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(CASE WHEN (d2 is null) THEN (1) ELSE (null) END), compute_bit_vector(d2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 896 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 896 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out b/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out index dda8546f48..a0dfd5ab9f 100644 --- a/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out +++ b/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out @@ -80,31 +80,35 @@ STAGE PLANS: outputColumnNames: key, as Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(as)), avg(COALESCE(length(as),0)), count(CASE WHEN (as is null) THEN (1) ELSE (null) END), compute_bit_vector(as, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -220,31 +224,35 @@ STAGE PLANS: outputColumnNames: key, as Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(as)), avg(COALESCE(length(as),0)), count(CASE WHEN (as is null) THEN (1) ELSE (null) END), compute_bit_vector(as, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -369,31 +377,35 @@ STAGE PLANS: outputColumnNames: key, as Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(as)), avg(COALESCE(length(as),0)), count(CASE WHEN (as is null) THEN (1) ELSE (null) END), compute_bit_vector(as, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/notable_alias1.q.out b/ql/src/test/results/clientpositive/llap/notable_alias1.q.out index 8973d87b7a..416a22580e 100644 --- a/ql/src/test/results/clientpositive/llap/notable_alias1.q.out +++ b/ql/src/test/results/clientpositive/llap/notable_alias1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: dummy, key, value Statistics: Num rows: 83 Data size: 8300 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(dummy)), avg(COALESCE(length(dummy),0)), count(CASE WHEN (dummy is null) THEN (1) ELSE (null) END), compute_bit_vector(dummy, 'hll'), min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/notable_alias2.q.out b/ql/src/test/results/clientpositive/llap/notable_alias2.q.out index e1b0aa870e..6e3c080514 100644 --- a/ql/src/test/results/clientpositive/llap/notable_alias2.q.out +++ b/ql/src/test/results/clientpositive/llap/notable_alias2.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: dummy, key, value Statistics: Num rows: 83 Data size: 8300 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(dummy)), avg(COALESCE(length(dummy),0)), count(CASE WHEN (dummy is null) THEN (1) ELSE (null) END), compute_bit_vector(dummy, 'hll'), min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_createas1.q.out b/ql/src/test/results/clientpositive/llap/orc_createas1.q.out index fe86c273b6..e2fbe135ec 100644 --- a/ql/src/test/results/clientpositive/llap/orc_createas1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_createas1.q.out @@ -95,33 +95,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -304,33 +308,37 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index 3748086552..35ad825091 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -89,37 +89,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -227,37 +227,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -405,37 +405,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index 82fcd88383..154ca614e9 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -82,19 +82,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -108,18 +108,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,19 +230,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -256,18 +256,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -423,19 +423,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -449,18 +449,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index 4843ad1b3b..04f8b5eb6d 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -56,19 +56,19 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: one (type: string), two (type: string), three (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 500 Data size: 422500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 500 Data size: 422500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -82,18 +82,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 500 Data size: 388500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out index 43704b23b5..9e5d03e627 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out index b62276e7f7..03c7bcae43 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out @@ -116,33 +116,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out index 799b7afd53..c0e6d6e4ab 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -70,33 +70,37 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: decimal(38,0)), _col13 (type: decimal(38,0)), _col14 (type: bigint), _col15 (type: binary), _col16 (type: timestamp), _col17 (type: timestamp), _col18 (type: bigint), _col19 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DECIMAL' (type: string), _col12 (type: decimal(38,0)), _col13 (type: decimal(38,0)), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'TIMESTAMP' (type: string), _col16 (type: timestamp), _col17 (type: timestamp), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -206,33 +210,37 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: decimal(38,0)), _col13 (type: decimal(38,0)), _col14 (type: bigint), _col15 (type: binary), _col16 (type: timestamp), _col17 (type: timestamp), _col18 (type: bigint), _col19 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DECIMAL' (type: string), _col12 (type: decimal(38,0)), _col13 (type: decimal(38,0)), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'TIMESTAMP' (type: string), _col16 (type: timestamp), _col17 (type: timestamp), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out index 0b9924680a..0b845678b2 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -70,11 +70,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') keys: year (type: string), hour (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) @@ -82,21 +82,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), _col17 (type: binary), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), _col21 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: 'LONG' (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DECIMAL' (type: string), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'TIMESTAMP' (type: string), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -259,11 +259,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') keys: year (type: string), hour (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) @@ -271,21 +271,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), _col17 (type: binary), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), _col21 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: 'LONG' (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DECIMAL' (type: string), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'TIMESTAMP' (type: string), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index 5fe669389e..338fce8988 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -58,11 +58,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, st Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') keys: st (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -70,7 +70,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), _col16 (type: binary), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), _col20 (type: binary) Reduce Output Operator key expressions: _col5 (type: double) null sort order: a @@ -84,14 +84,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DECIMAL' (type: string), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'TIMESTAMP' (type: string), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -295,11 +295,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, st Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') keys: st (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -307,7 +307,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), _col16 (type: binary), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), _col20 (type: binary) Reduce Output Operator key expressions: _col5 (type: double) null sort order: a @@ -321,14 +321,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DECIMAL' (type: string), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'TIMESTAMP' (type: string), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index 3215f09ac8..3434bc4fdd 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -82,19 +82,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -108,18 +108,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,19 +230,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -256,18 +256,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -423,19 +423,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 165250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -449,18 +449,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 148250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index 83753ee1eb..b7a62e8506 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -70,33 +70,37 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: decimal(38,0)), _col13 (type: decimal(38,0)), _col14 (type: bigint), _col15 (type: binary), _col16 (type: timestamp), _col17 (type: timestamp), _col18 (type: bigint), _col19 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DECIMAL' (type: string), _col12 (type: decimal(38,0)), _col13 (type: decimal(38,0)), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'TIMESTAMP' (type: string), _col16 (type: timestamp), _col17 (type: timestamp), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index 54dbc75d86..78f5b1426f 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -58,11 +58,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, st Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(CASE WHEN (userid is null) THEN (1) ELSE (null) END), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(CASE WHEN (ts is null) THEN (1) ELSE (null) END), compute_bit_vector(ts, 'hll') keys: st (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -70,7 +70,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), _col16 (type: binary), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), _col20 (type: binary) Reduce Output Operator key expressions: _col5 (type: double) null sort order: a @@ -84,14 +84,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DECIMAL' (type: string), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'TIMESTAMP' (type: string), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/parallel.q.out b/ql/src/test/results/clientpositive/llap/parallel.q.out index 8548d70937..ba5d403a91 100644 --- a/ql/src/test/results/clientpositive/llap/parallel.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel.q.out @@ -109,16 +109,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -137,46 +137,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out b/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out index 26aec2577f..f437e01ffe 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out @@ -109,16 +109,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -137,46 +137,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/parallel_join1.q.out b/ql/src/test/results/clientpositive/llap/parallel_join1.q.out index 4a54f24f4e..fc8437ca79 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_join1.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out b/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out index f49f8f9503..fd11b5aeb9 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out @@ -85,17 +85,21 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out b/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out index 00dc5406d7..219c580255 100644 --- a/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out @@ -38,33 +38,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 580 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/partition_ctas.q.out b/ql/src/test/results/clientpositive/llap/partition_ctas.q.out index 1700007677..c7185a252e 100644 --- a/ql/src/test/results/clientpositive/llap/partition_ctas.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_ctas.q.out @@ -47,19 +47,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -73,18 +73,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/pcr.q.out b/ql/src/test/results/clientpositive/llap/pcr.q.out index d10364c727..c52bd1549b 100644 --- a/ql/src/test/results/clientpositive/llap/pcr.q.out +++ b/ql/src/test/results/clientpositive/llap/pcr.q.out @@ -3513,19 +3513,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Select Operator expressions: key (type: int), value (type: string) @@ -3562,19 +3562,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3624,67 +3624,75 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -3851,19 +3859,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Filter Operator isSamplingPred: false @@ -3904,19 +3912,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3966,67 +3974,75 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out b/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out index d527113b73..aa4ea99d7c 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3 Statistics: Num rows: 25 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -197,33 +201,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3 Statistics: Num rows: 25 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out index 6f0c154f6a..cd6c104f5d 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out @@ -142,16 +142,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 263 Data size: 24985 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -172,16 +172,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 87 Data size: 8265 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -202,19 +202,19 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 87 Data size: 16008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 263 Data size: 46814 Basic stats: COMPLETE Column stats: COMPLETE @@ -233,48 +233,56 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1555,16 +1563,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 263 Data size: 24985 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -1585,16 +1593,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 87 Data size: 8265 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -1615,19 +1623,19 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 87 Data size: 16008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: binary) Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 263 Data size: 46814 Basic stats: COMPLETE Column stats: COMPLETE @@ -1646,48 +1654,56 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index cd10396a7d..1ec4c6ac3d 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -3200,31 +3200,35 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, s Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(CASE WHEN (r is null) THEN (1) ELSE (null) END), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(CASE WHEN (dr is null) THEN (1) ELSE (null) END), compute_bit_vector(dr, 'hll'), min(s), max(s), count(CASE WHEN (s is null) THEN (1) ELSE (null) END), compute_bit_vector(s, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), _col23 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -3328,31 +3332,35 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(s2), max(s2), count(CASE WHEN (s2 is null) THEN (1) ELSE (null) END), compute_bit_vector(s2, 'hll'), min(r), max(r), count(CASE WHEN (r is null) THEN (1) ELSE (null) END), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(CASE WHEN (dr is null) THEN (1) ELSE (null) END), compute_bit_vector(dr, 'hll'), min(cud), max(cud), count(CASE WHEN (cud is null) THEN (1) ELSE (null) END), compute_bit_vector(cud, 'hll'), min(fv1), max(fv1), count(CASE WHEN (fv1 is null) THEN (1) ELSE (null) END), compute_bit_vector(fv1, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: int), _col22 (type: bigint), _col23 (type: binary), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), _col27 (type: binary), _col28 (type: int), _col29 (type: int), _col30 (type: bigint), _col31 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), min(VALUE._col24), max(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), min(VALUE._col28), max(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'LONG' (type: string), UDFToLong(_col20) (type: bigint), UDFToLong(_col21) (type: bigint), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'LONG' (type: string), UDFToLong(_col28) (type: bigint), UDFToLong(_col29) (type: bigint), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47 + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/quote1.q.out b/ql/src/test/results/clientpositive/llap/quote1.q.out index c3867ffa34..8d20e6922b 100644 --- a/ql/src/test/results/clientpositive/llap/quote1.q.out +++ b/ql/src/test/results/clientpositive/llap/quote1.q.out @@ -58,37 +58,37 @@ STAGE PLANS: outputColumnNames: location, type, table Statistics: Num rows: 55 Data size: 10395 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(location, 'hll'), compute_stats(type, 'hll') + aggregations: min(location), max(location), count(CASE WHEN (location is null) THEN (1) ELSE (null) END), compute_bit_vector(location, 'hll'), max(length(type)), avg(COALESCE(length(type),0)), count(CASE WHEN (type is null) THEN (1) ELSE (null) END), compute_bit_vector(type, 'hll') keys: table (type: string) minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 418 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out index 13249e91f8..688898d5bd 100644 --- a/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out @@ -84,19 +84,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 151848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(CASE WHEN (hr is null) THEN (1) ELSE (null) END), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(CASE WHEN (ds is null) THEN (1) ELSE (null) END), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -185,34 +185,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out index a680358a46..cf57a7a2bb 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index 59f4acd59b..11e78ace87 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -56,19 +56,19 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: one (type: string), two (type: string), three (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 500 Data size: 422500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 500 Data size: 422500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -82,18 +82,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 500 Data size: 388500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), _col5 (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out index 24af5c74d5..bc2afd7c1a 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out index 5737ac16b1..8f26261cc4 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out b/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out index 52506bfb8c..f4a41e46d9 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out @@ -178,31 +178,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out index 33ebf0cb14..5761ead50a 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -136,34 +136,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -395,12 +399,12 @@ STAGE PLANS: outputColumnNames: aid, bid, t, ctime, etime, l, et, ds Statistics: Num rows: 1 Data size: 462 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(aid, 'hll'), compute_stats(bid, 'hll'), compute_stats(t, 'hll'), compute_stats(ctime, 'hll'), compute_stats(etime, 'hll'), compute_stats(l, 'hll'), compute_stats(et, 'hll') + aggregations: max(length(aid)), avg(COALESCE(length(aid),0)), count(CASE WHEN (aid is null) THEN (1) ELSE (null) END), compute_bit_vector(aid, 'hll'), max(length(bid)), avg(COALESCE(length(bid),0)), count(CASE WHEN (bid is null) THEN (1) ELSE (null) END), compute_bit_vector(bid, 'hll'), min(t), max(t), count(CASE WHEN (t is null) THEN (1) ELSE (null) END), compute_bit_vector(t, 'hll'), max(length(ctime)), avg(COALESCE(length(ctime),0)), count(CASE WHEN (ctime is null) THEN (1) ELSE (null) END), compute_bit_vector(ctime, 'hll'), min(etime), max(etime), count(CASE WHEN (etime is null) THEN (1) ELSE (null) END), compute_bit_vector(etime, 'hll'), max(length(l)), avg(COALESCE(length(l),0)), count(CASE WHEN (l is null) THEN (1) ELSE (null) END), compute_bit_vector(l, 'hll'), max(length(et)), avg(COALESCE(length(et),0)), count(CASE WHEN (et is null) THEN (1) ELSE (null) END), compute_bit_vector(et, 'hll') keys: '2010-03-29' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28 + Statistics: Num rows: 1 Data size: 1582 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: '2010-03-29' (type: string) @@ -408,39 +412,39 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: '2010-03-29' (type: string) - Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1582 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary), _col25 (type: int), _col26 (type: struct), _col27 (type: bigint), _col28 (type: binary) auto parallelism: true Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), max(VALUE._col24), avg(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27) keys: '2010-03-29' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28 + Statistics: Num rows: 1 Data size: 1242 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), _col23 (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col25,0)) (type: bigint), COALESCE(_col26,0) (type: double), _col27 (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 directory: hdfs://### HDFS PATH ### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL Stats Publishing Key Prefix: hdfs://### HDFS PATH ### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types struct:struct:struct:struct:struct:struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29,_col30,_col31,_col32,_col33,_col34,_col35,_col36,_col37,_col38,_col39,_col40,_col41,_col42 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/sample1.q.out b/ql/src/test/results/clientpositive/llap/sample1.q.out index b257edc45c..fa31e51ed9 100644 --- a/ql/src/test/results/clientpositive/llap/sample1.q.out +++ b/ql/src/test/results/clientpositive/llap/sample1.q.out @@ -82,19 +82,19 @@ STAGE PLANS: outputColumnNames: key, value, dt, hr Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(dt)), avg(COALESCE(length(dt),0)), count(CASE WHEN (dt is null) THEN (1) ELSE (null) END), compute_bit_vector(dt, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(CASE WHEN (hr is null) THEN (1) ELSE (null) END), compute_bit_vector(hr, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -145,34 +145,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sample5.q.out b/ql/src/test/results/clientpositive/llap/sample5.q.out index e870e8c9b2..157adc5251 100644 --- a/ql/src/test/results/clientpositive/llap/sample5.q.out +++ b/ql/src/test/results/clientpositive/llap/sample5.q.out @@ -79,19 +79,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -140,34 +140,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sample6.q.out b/ql/src/test/results/clientpositive/llap/sample6.q.out index 71141024c2..2f1a7d7525 100644 --- a/ql/src/test/results/clientpositive/llap/sample6.q.out +++ b/ql/src/test/results/clientpositive/llap/sample6.q.out @@ -78,19 +78,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -139,34 +139,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sample7.q.out b/ql/src/test/results/clientpositive/llap/sample7.q.out index e217faa782..d2f6ab45e0 100644 --- a/ql/src/test/results/clientpositive/llap/sample7.q.out +++ b/ql/src/test/results/clientpositive/llap/sample7.q.out @@ -80,19 +80,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -141,34 +141,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 10e3ba040a..f89a6e2976 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -3567,31 +3567,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3918,31 +3922,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out index fcae6cae9a..304cb5f386 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out @@ -494,31 +494,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4, col5, col6 Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(CASE WHEN (col4 is null) THEN (1) ELSE (null) END), compute_bit_vector(col4, 'hll'), min(col5), max(col5), count(CASE WHEN (col5 is null) THEN (1) ELSE (null) END), compute_bit_vector(col5, 'hll'), min(col6), max(col6), count(CASE WHEN (col6 is null) THEN (1) ELSE (null) END), compute_bit_vector(col6, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1136 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1136 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), _col19 (type: binary), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), _col23 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1136 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'DOUBLE' (type: string), _col16 (type: double), _col17 (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out b/ql/src/test/results/clientpositive/llap/show_functions.q.out index 4b38cfb604..834dbc9d6c 100644 --- a/ql/src/test/results/clientpositive/llap/show_functions.q.out +++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out @@ -72,6 +72,7 @@ chr coalesce collect_list collect_set +compute_bit_vector compute_stats concat concat_ws @@ -259,6 +260,7 @@ month months_between murmur_hash named_struct +ndv_compute_bit_vector negative next_day ngrams @@ -424,6 +426,7 @@ chr coalesce collect_list collect_set +compute_bit_vector compute_stats concat concat_ws @@ -580,6 +583,7 @@ chr coalesce collect_list collect_set +compute_bit_vector compute_stats concat concat_ws @@ -767,6 +771,7 @@ month months_between murmur_hash named_struct +ndv_compute_bit_vector negative next_day ngrams diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out index 175219991c..fcbc3598d8 100644 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out @@ -405,31 +405,35 @@ STAGE PLANS: outputColumnNames: category, _c1 Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(category, 'hll'), compute_stats(_c1, 'hll') + aggregations: max(length(category)), avg(COALESCE(length(category),0)), count(CASE WHEN (category is null) THEN (1) ELSE (null) END), compute_bit_vector(category, 'hll'), max(length(_c1)), avg(COALESCE(length(_c1),0)), count(CASE WHEN (_c1 is null) THEN (1) ELSE (null) END) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'BINARY' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out index d9b72b079b..eda5453ab2 100644 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out @@ -416,31 +416,35 @@ STAGE PLANS: outputColumnNames: category, _c1, _c2 Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(category, 'hll'), compute_stats(_c1, 'hll'), compute_stats(_c2, 'hll') + aggregations: max(length(category)), avg(COALESCE(length(category),0)), count(CASE WHEN (category is null) THEN (1) ELSE (null) END), compute_bit_vector(category, 'hll'), max(length(_c1)), avg(COALESCE(length(_c1),0)), count(CASE WHEN (_c1 is null) THEN (1) ELSE (null) END), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), min(VALUE._col7), max(VALUE._col8), count(VALUE._col9), compute_bit_vector(VALUE._col10) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'BINARY' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), 'LONG' (type: string), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out index 126aee61cb..ee4d09389b 100644 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out @@ -113,31 +113,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -279,31 +283,35 @@ STAGE PLANS: outputColumnNames: _c0, category, _c2 Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(_c0, 'hll'), compute_stats(category, 'hll'), compute_stats(_c2, 'hll') + aggregations: max(length(_c0)), avg(COALESCE(length(_c0),0)), count(CASE WHEN (_c0 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c0, 'hll'), max(length(category)), avg(COALESCE(length(category),0)), count(CASE WHEN (category is null) THEN (1) ELSE (null) END), compute_bit_vector(category, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index c8b827d50b..d2a54ff27c 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -168,31 +168,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out b/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out index 8929b61d53..eba3e088b3 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out @@ -117,31 +117,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 30 Data size: 5340 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.96666664 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out b/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out index c25ab387b4..4f2329c7cf 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out @@ -169,31 +169,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 3 Data size: 522 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out index 80a9888ca8..e1aa912403 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out @@ -290,35 +290,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index c1efe543af..1535e4bfab 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -129,32 +129,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 825 Data size: 156750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(CASE WHEN (v1 is null) THEN (1) ELSE (null) END), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(CASE WHEN (v2 is null) THEN (1) ELSE (null) END), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1336,32 +1340,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 825 Data size: 156750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(CASE WHEN (v1 is null) THEN (1) ELSE (null) END), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(CASE WHEN (v2 is null) THEN (1) ELSE (null) END), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2559,32 +2567,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(CASE WHEN (v1 is null) THEN (1) ELSE (null) END), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(CASE WHEN (v2 is null) THEN (1) ELSE (null) END), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2698,32 +2710,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(CASE WHEN (v1 is null) THEN (1) ELSE (null) END), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(CASE WHEN (v2 is null) THEN (1) ELSE (null) END), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 8384a6c851..1a18898e52 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -215,31 +215,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -460,31 +464,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out index b857f36211..44d3c1da1c 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out @@ -273,31 +273,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -825,31 +829,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1129,31 +1137,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1433,31 +1445,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1737,31 +1753,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2108,31 +2128,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -2441,31 +2465,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/stats0.q.out b/ql/src/test/results/clientpositive/llap/stats0.q.out index 2b08b61c17..be9c2d859e 100644 --- a/ql/src/test/results/clientpositive/llap/stats0.q.out +++ b/ql/src/test/results/clientpositive/llap/stats0.q.out @@ -75,19 +75,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -133,34 +133,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -782,37 +786,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 131500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1493,19 +1497,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1551,34 +1555,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2200,37 +2208,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 131500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats1.q.out b/ql/src/test/results/clientpositive/llap/stats1.q.out index e1c38ed13a..0b9b17e8ff 100644 --- a/ql/src/test/results/clientpositive/llap/stats1.q.out +++ b/ql/src/test/results/clientpositive/llap/stats1.q.out @@ -84,16 +84,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -121,31 +121,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/stats10.q.out b/ql/src/test/results/clientpositive/llap/stats10.q.out index 5e0fb15365..5e3b8399ba 100644 --- a/ql/src/test/results/clientpositive/llap/stats10.q.out +++ b/ql/src/test/results/clientpositive/llap/stats10.q.out @@ -71,35 +71,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats11.q.out b/ql/src/test/results/clientpositive/llap/stats11.q.out index b69b8e2c79..da94340391 100644 --- a/ql/src/test/results/clientpositive/llap/stats11.q.out +++ b/ql/src/test/results/clientpositive/llap/stats11.q.out @@ -518,53 +518,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -932,53 +936,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 696 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/stats4.q.out b/ql/src/test/results/clientpositive/llap/stats4.q.out index 2a6ca49af9..806f7c9e60 100644 --- a/ql/src/test/results/clientpositive/llap/stats4.q.out +++ b/ql/src/test/results/clientpositive/llap/stats4.q.out @@ -96,19 +96,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -129,37 +129,37 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-12-31' (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,18 +168,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out index d9bbe8a572..038341c681 100644 --- a/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out @@ -53,37 +53,37 @@ STAGE PLANS: outputColumnNames: key, part Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out b/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out index 29eab6390d..1fe665754a 100644 --- a/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out @@ -155,33 +155,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out b/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out index 4c78c23c4b..7a0ecd2841 100644 --- a/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out @@ -91,33 +91,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out index 5c5e5c3da9..1dae93554c 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out @@ -231,17 +231,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -266,31 +270,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 131 Data size: 23318 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -628,16 +636,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 131 Data size: 23318 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Map Join Operator condition map: Left Semi Join 0 to 1 @@ -741,17 +749,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -772,17 +784,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/tablevalues.q.out b/ql/src/test/results/clientpositive/llap/tablevalues.q.out index b936fa3e4b..99cc493fab 100644 --- a/ql/src/test/results/clientpositive/llap/tablevalues.q.out +++ b/ql/src/test/results/clientpositive/llap/tablevalues.q.out @@ -79,33 +79,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out index e52adde9cd..2f477e02b9 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out @@ -186,33 +186,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -258,19 +262,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(CASE WHEN (sourceip is null) THEN (1) ELSE (null) END), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(CASE WHEN (avgtimeonsite is null) THEN (1) ELSE (null) END), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(CASE WHEN (adrevenue is null) THEN (1) ELSE (null) END), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: float), _col9 (type: float), _col10 (type: bigint), _col11 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -320,34 +324,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col8) (type: double), UDFToDouble(_col9) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -542,33 +550,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(b), max(b), count(CASE WHEN (b is null) THEN (1) ELSE (null) END), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), count(CASE WHEN (d is true) THEN (1) ELSE (null) END), count(CASE WHEN (d is false) THEN (1) ELSE (null) END), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), max(length(e)), avg(COALESCE(length(e),0)), count(CASE WHEN (e is null) THEN (1) ELSE (null) END) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: int), _col16 (type: struct), _col17 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), count(VALUE._col12), count(VALUE._col13), count(VALUE._col14), max(VALUE._col15), avg(VALUE._col16), count(VALUE._col17) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'BOOLEAN' (type: string), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), 'BINARY' (type: string), UDFToLong(COALESCE(_col15,0)) (type: bigint), COALESCE(_col16,0) (type: double), _col17 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out index 0a92907e29..62f65da195 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out @@ -106,11 +106,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -118,19 +118,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -450,11 +450,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -462,19 +462,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out index e56f9ebbea..12b470c569 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out @@ -117,11 +117,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -129,19 +129,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -277,11 +277,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(CASE WHEN (one is null) THEN (1) ELSE (null) END), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(CASE WHEN (two is null) THEN (1) ELSE (null) END), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -289,19 +289,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out index abb4f779eb..b0935e7e40 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out @@ -715,11 +715,11 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -727,21 +727,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), _col4 (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), _col8 (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index bdc6ee6f7b..53f1b8d864 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -84,17 +84,21 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -501,37 +505,37 @@ STAGE PLANS: outputColumnNames: c, d Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c, 'hll') + aggregations: max(length(c)), avg(COALESCE(length(c),0)), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') keys: d (type: int) minReductionHashAggr: 0.98381877 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 1180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 5 Data size: 1180 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 5 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -978,16 +982,16 @@ STAGE PLANS: outputColumnNames: c, d Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Filter Operator predicate: ((key % 2) = 1) (type: boolean) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE @@ -1008,48 +1012,56 @@ STAGE PLANS: outputColumnNames: c, d Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(CASE WHEN (d is null) THEN (1) ELSE (null) END), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index 8eb76b87ac..976ee95cf8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -89,19 +89,19 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(CASE WHEN (id1 is null) THEN (1) ELSE (null) END), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -130,37 +130,37 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(CASE WHEN (id1 is null) THEN (1) ELSE (null) END), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out index eac9094739..1216761bbb 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out @@ -102,19 +102,19 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(CASE WHEN (id1 is null) THEN (1) ELSE (null) END), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -143,37 +143,37 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(CASE WHEN (id1 is null) THEN (1) ELSE (null) END), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 8b7e705955..7b9f51e9ff 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -226,31 +226,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 85952 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -277,31 +281,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -1187,31 +1195,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 85952 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -1238,31 +1250,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2182,31 +2198,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 85952 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2233,31 +2253,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3169,31 +3193,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -3220,31 +3248,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -4102,16 +4134,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -4138,46 +4170,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/udf1.q.out b/ql/src/test/results/clientpositive/llap/udf1.q.out index 05e23f0068..e01a4ec0a6 100644 --- a/ql/src/test/results/clientpositive/llap/udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/udf1.q.out @@ -76,33 +76,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 Statistics: Num rows: 250 Data size: 442000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll'), compute_stats(c12, 'hll'), compute_stats(c13, 'hll'), compute_stats(c14, 'hll'), compute_stats(c15, 'hll'), compute_stats(c16, 'hll'), compute_stats(c17, 'hll'), compute_stats(c18, 'hll'), compute_stats(c19, 'hll'), compute_stats(c20, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), max(length(c3)), avg(COALESCE(length(c3),0)), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), max(length(c5)), avg(COALESCE(length(c5),0)), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(CASE WHEN (c6 is null) THEN (1) ELSE (null) END), compute_bit_vector(c6, 'hll'), max(length(c7)), avg(COALESCE(length(c7),0)), count(CASE WHEN (c7 is null) THEN (1) ELSE (null) END), compute_bit_vector(c7, 'hll'), max(length(c8)), avg(COALESCE(length(c8),0)), count(CASE WHEN (c8 is null) THEN (1) ELSE (null) END), compute_bit_vector(c8, 'hll'), max(length(c9)), avg(COALESCE(length(c9),0)), count(CASE WHEN (c9 is null) THEN (1) ELSE (null) END), compute_bit_vector(c9, 'hll'), max(length(c10)), avg(COALESCE(length(c10),0)), count(CASE WHEN (c10 is null) THEN (1) ELSE (null) END), compute_bit_vector(c10, 'hll'), max(length(c11)), avg(COALESCE(length(c11),0)), count(CASE WHEN (c11 is null) THEN (1) ELSE (null) END), compute_bit_vector(c11, 'hll'), max(length(c12)), avg(COALESCE(length(c12),0)), count(CASE WHEN (c12 is null) THEN (1) ELSE (null) END), compute_bit_vector(c12, 'hll'), max(length(c13)), avg(COALESCE(length(c13),0)), count(CASE WHEN (c13 is null) THEN (1) ELSE (null) END), compute_bit_vector(c13, 'hll'), max(length(c14)), avg(COALESCE(length(c14),0)), count(CASE WHEN (c14 is null) THEN (1) ELSE (null) END), compute_bit_vector(c14, 'hll'), max(length(c15)), avg(COALESCE(length(c15),0)), count(CASE WHEN (c15 is null) THEN (1) ELSE (null) END), compute_bit_vector(c15, 'hll'), max(length(c16)), avg(COALESCE(length(c16),0)), count(CASE WHEN (c16 is null) THEN (1) ELSE (null) END), compute_bit_vector(c16, 'hll'), max(length(c17)), avg(COALESCE(length(c17),0)), count(CASE WHEN (c17 is null) THEN (1) ELSE (null) END), compute_bit_vector(c17, 'hll'), max(length(c18)), avg(COALESCE(length(c18),0)), count(CASE WHEN (c18 is null) THEN (1) ELSE (null) END), compute_bit_vector(c18, 'hll'), max(length(c19)), avg(COALESCE(length(c19),0)), count(CASE WHEN (c19 is null) THEN (1) ELSE (null) END), compute_bit_vector(c19, 'hll'), max(length(c20)), avg(COALESCE(length(c20),0)), count(CASE WHEN (c20 is null) THEN (1) ELSE (null) END), compute_bit_vector(c20, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65, _col66, _col67, _col68, _col69, _col70, _col71, _col72, _col73, _col74, _col75, _col76, _col77, _col78, _col79 + Statistics: Num rows: 1 Data size: 4640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Statistics: Num rows: 1 Data size: 4640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: struct), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: struct), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: struct), _col22 (type: bigint), _col23 (type: binary), _col24 (type: int), _col25 (type: struct), _col26 (type: bigint), _col27 (type: binary), _col28 (type: int), _col29 (type: struct), _col30 (type: bigint), _col31 (type: binary), _col32 (type: int), _col33 (type: struct), _col34 (type: bigint), _col35 (type: binary), _col36 (type: int), _col37 (type: struct), _col38 (type: bigint), _col39 (type: binary), _col40 (type: int), _col41 (type: struct), _col42 (type: bigint), _col43 (type: binary), _col44 (type: int), _col45 (type: struct), _col46 (type: bigint), _col47 (type: binary), _col48 (type: int), _col49 (type: struct), _col50 (type: bigint), _col51 (type: binary), _col52 (type: int), _col53 (type: struct), _col54 (type: bigint), _col55 (type: binary), _col56 (type: int), _col57 (type: struct), _col58 (type: bigint), _col59 (type: binary), _col60 (type: int), _col61 (type: struct), _col62 (type: bigint), _col63 (type: binary), _col64 (type: int), _col65 (type: struct), _col66 (type: bigint), _col67 (type: binary), _col68 (type: int), _col69 (type: struct), _col70 (type: bigint), _col71 (type: binary), _col72 (type: int), _col73 (type: struct), _col74 (type: bigint), _col75 (type: binary), _col76 (type: int), _col77 (type: struct), _col78 (type: bigint), _col79 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), max(VALUE._col20), avg(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), max(VALUE._col24), avg(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), max(VALUE._col28), avg(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31), max(VALUE._col32), avg(VALUE._col33), count(VALUE._col34), compute_bit_vector(VALUE._col35), max(VALUE._col36), avg(VALUE._col37), count(VALUE._col38), compute_bit_vector(VALUE._col39), max(VALUE._col40), avg(VALUE._col41), count(VALUE._col42), compute_bit_vector(VALUE._col43), max(VALUE._col44), avg(VALUE._col45), count(VALUE._col46), compute_bit_vector(VALUE._col47), max(VALUE._col48), avg(VALUE._col49), count(VALUE._col50), compute_bit_vector(VALUE._col51), max(VALUE._col52), avg(VALUE._col53), count(VALUE._col54), compute_bit_vector(VALUE._col55), max(VALUE._col56), avg(VALUE._col57), count(VALUE._col58), compute_bit_vector(VALUE._col59), max(VALUE._col60), avg(VALUE._col61), count(VALUE._col62), compute_bit_vector(VALUE._col63), max(VALUE._col64), avg(VALUE._col65), count(VALUE._col66), compute_bit_vector(VALUE._col67), max(VALUE._col68), avg(VALUE._col69), count(VALUE._col70), compute_bit_vector(VALUE._col71), max(VALUE._col72), avg(VALUE._col73), count(VALUE._col74), compute_bit_vector(VALUE._col75), max(VALUE._col76), avg(VALUE._col77), count(VALUE._col78), compute_bit_vector(VALUE._col79) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65, _col66, _col67, _col68, _col69, _col70, _col71, _col72, _col73, _col74, _col75, _col76, _col77, _col78, _col79 + Statistics: Num rows: 1 Data size: 3280 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col20,0)) (type: bigint), COALESCE(_col21,0) (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col24,0)) (type: bigint), COALESCE(_col25,0) (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col28,0)) (type: bigint), COALESCE(_col29,0) (type: double), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col32,0)) (type: bigint), COALESCE(_col33,0) (type: double), _col34 (type: bigint), COALESCE(ndv_compute_bit_vector(_col35),0) (type: bigint), _col35 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col36,0)) (type: bigint), COALESCE(_col37,0) (type: double), _col38 (type: bigint), COALESCE(ndv_compute_bit_vector(_col39),0) (type: bigint), _col39 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col40,0)) (type: bigint), COALESCE(_col41,0) (type: double), _col42 (type: bigint), COALESCE(ndv_compute_bit_vector(_col43),0) (type: bigint), _col43 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col44,0)) (type: bigint), COALESCE(_col45,0) (type: double), _col46 (type: bigint), COALESCE(ndv_compute_bit_vector(_col47),0) (type: bigint), _col47 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col48,0)) (type: bigint), COALESCE(_col49,0) (type: double), _col50 (type: bigint), COALESCE(ndv_compute_bit_vector(_col51),0) (type: bigint), _col51 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col52,0)) (type: bigint), COALESCE(_col53,0) (type: double), _col54 (type: bigint), COALESCE(ndv_compute_bit_vector(_col55),0) (type: bigint), _col55 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col56,0)) (type: bigint), COALESCE(_col57,0) (type: double), _col58 (type: bigint), COALESCE(ndv_compute_bit_vector(_col59),0) (type: bigint), _col59 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col60,0)) (type: bigint), COALESCE(_col61,0) (type: double), _col62 (type: bigint), COALESCE(ndv_compute_bit_vector(_col63),0) (type: bigint), _col63 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col64,0)) (type: bigint), COALESCE(_col65,0) (type: double), _col66 (type: bigint), COALESCE(ndv_compute_bit_vector(_col67),0) (type: bigint), _col67 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col68,0)) (type: bigint), COALESCE(_col69,0) (type: double), _col70 (type: bigint), COALESCE(ndv_compute_bit_vector(_col71),0) (type: bigint), _col71 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col72,0)) (type: bigint), COALESCE(_col73,0) (type: double), _col74 (type: bigint), COALESCE(ndv_compute_bit_vector(_col75),0) (type: bigint), _col75 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col76,0)) (type: bigint), COALESCE(_col77,0) (type: double), _col78 (type: bigint), COALESCE(ndv_compute_bit_vector(_col79),0) (type: bigint), _col79 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65, _col66, _col67, _col68, _col69, _col70, _col71, _col72, _col73, _col74, _col75, _col76, _col77, _col78, _col79, _col80, _col81, _col82, _col83, _col84, _col85, _col86, _col87, _col88, _col89, _col90, _col91, _col92, _col93, _col94, _col95, _col96, _col97, _col98, _col99, _col100, _col101, _col102, _col103, _col104, _col105, _col106, _col107, _col108, _col109, _col110, _col111, _col112, _col113, _col114, _col115, _col116, _col117, _col118, _col119 + Statistics: Num rows: 1 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf3.q.out b/ql/src/test/results/clientpositive/llap/udf3.q.out index a3c993b781..2fd6a456c5 100644 --- a/ql/src/test/results/clientpositive/llap/udf3.q.out +++ b/ql/src/test/results/clientpositive/llap/udf3.q.out @@ -77,17 +77,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5 Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(CASE WHEN (c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(c2, 'hll'), max(length(c3)), avg(COALESCE(length(c3),0)), count(CASE WHEN (c3 is null) THEN (1) ELSE (null) END), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(CASE WHEN (c4 is null) THEN (1) ELSE (null) END), compute_bit_vector(c4, 'hll'), max(length(c5)), avg(COALESCE(length(c5),0)), count(CASE WHEN (c5 is null) THEN (1) ELSE (null) END), compute_bit_vector(c5, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 820 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col12,0)) (type: bigint), COALESCE(_col13,0) (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out b/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out index ddf22640a1..f98ab42d63 100644 --- a/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out @@ -62,33 +62,37 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_character_length.q.out b/ql/src/test/results/clientpositive/llap/udf_character_length.q.out index e4c22c6567..bc9a8452cd 100644 --- a/ql/src/test/results/clientpositive/llap/udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_character_length.q.out @@ -82,33 +82,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: min(len), max(len), count(CASE WHEN (len is null) THEN (1) ELSE (null) END), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_length.q.out b/ql/src/test/results/clientpositive/llap/udf_length.q.out index 7a5bfba7a9..df0e12b2cb 100644 --- a/ql/src/test/results/clientpositive/llap/udf_length.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_length.q.out @@ -65,33 +65,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: min(len), max(len), count(CASE WHEN (len is null) THEN (1) ELSE (null) END), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out b/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out index f0117794f6..a10e9b2da9 100644 --- a/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out @@ -65,33 +65,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: min(len), max(len), count(CASE WHEN (len is null) THEN (1) ELSE (null) END), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_reverse.q.out b/ql/src/test/results/clientpositive/llap/udf_reverse.q.out index 03e3a2126c..1904eaf509 100644 --- a/ql/src/test/results/clientpositive/llap/udf_reverse.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_reverse.q.out @@ -65,33 +65,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: max(length(len)), avg(COALESCE(length(len),0)), count(CASE WHEN (len is null) THEN (1) ELSE (null) END), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/union10.q.out b/ql/src/test/results/clientpositive/llap/union10.q.out index d139bea3a8..d88286743d 100644 --- a/ql/src/test/results/clientpositive/llap/union10.q.out +++ b/ql/src/test/results/clientpositive/llap/union10.q.out @@ -132,31 +132,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -186,16 +190,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -225,16 +229,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union12.q.out b/ql/src/test/results/clientpositive/llap/union12.q.out index 480848068a..d2fe38b720 100644 --- a/ql/src/test/results/clientpositive/llap/union12.q.out +++ b/ql/src/test/results/clientpositive/llap/union12.q.out @@ -136,31 +136,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -190,16 +194,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -229,16 +233,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union17.q.out b/ql/src/test/results/clientpositive/llap/union17.q.out index 1b32388013..7310ce3c60 100644 --- a/ql/src/test/results/clientpositive/llap/union17.q.out +++ b/ql/src/test/results/clientpositive/llap/union17.q.out @@ -176,31 +176,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -227,31 +231,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union18.q.out b/ql/src/test/results/clientpositive/llap/union18.q.out index 2538051311..0b973210f7 100644 --- a/ql/src/test/results/clientpositive/llap/union18.q.out +++ b/ql/src/test/results/clientpositive/llap/union18.q.out @@ -95,16 +95,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -122,16 +122,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -159,16 +159,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -186,46 +186,54 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union19.q.out b/ql/src/test/results/clientpositive/llap/union19.q.out index 966a053370..6c37f2cd3b 100644 --- a/ql/src/test/results/clientpositive/llap/union19.q.out +++ b/ql/src/test/results/clientpositive/llap/union19.q.out @@ -114,16 +114,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -169,16 +169,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -205,46 +205,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union22.q.out b/ql/src/test/results/clientpositive/llap/union22.q.out index 7167f7d085..00526d058f 100644 --- a/ql/src/test/results/clientpositive/llap/union22.q.out +++ b/ql/src/test/results/clientpositive/llap/union22.q.out @@ -156,12 +156,12 @@ STAGE PLANS: outputColumnNames: k1, k2, k3, k4, ds Statistics: Num rows: 387 Data size: 141297 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(k3)), avg(COALESCE(length(k3),0)), count(CASE WHEN (k3 is null) THEN (1) ELSE (null) END), compute_bit_vector(k3, 'hll'), max(length(k4)), avg(COALESCE(length(k4),0)), count(CASE WHEN (k4 is null) THEN (1) ELSE (null) END), compute_bit_vector(k4, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1013 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -169,9 +169,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1013 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -286,12 +286,12 @@ STAGE PLANS: outputColumnNames: k1, k2, k3, k4, ds Statistics: Num rows: 387 Data size: 141297 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(k3)), avg(COALESCE(length(k3),0)), count(CASE WHEN (k3 is null) THEN (1) ELSE (null) END), compute_bit_vector(k3, 'hll'), max(length(k4)), avg(COALESCE(length(k4),0)), count(CASE WHEN (k4 is null) THEN (1) ELSE (null) END), compute_bit_vector(k4, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1013 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -299,9 +299,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1013 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -420,30 +420,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 741 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/union25.q.out b/ql/src/test/results/clientpositive/llap/union25.q.out index 144582195d..53e74f768e 100644 --- a/ql/src/test/results/clientpositive/llap/union25.q.out +++ b/ql/src/test/results/clientpositive/llap/union25.q.out @@ -188,31 +188,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 408 Data size: 75888 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/union28.q.out b/ql/src/test/results/clientpositive/llap/union28.q.out index 46505a54d8..9f66fe5efe 100644 --- a/ql/src/test/results/clientpositive/llap/union28.q.out +++ b/ql/src/test/results/clientpositive/llap/union28.q.out @@ -79,16 +79,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -141,17 +141,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -177,16 +181,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -212,16 +216,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union29.q.out b/ql/src/test/results/clientpositive/llap/union29.q.out index 09bba31bbe..0453515f69 100644 --- a/ql/src/test/results/clientpositive/llap/union29.q.out +++ b/ql/src/test/results/clientpositive/llap/union29.q.out @@ -79,16 +79,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -117,16 +117,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -155,33 +155,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union30.q.out b/ql/src/test/results/clientpositive/llap/union30.q.out index 37cb6d6764..9d57b376ba 100644 --- a/ql/src/test/results/clientpositive/llap/union30.q.out +++ b/ql/src/test/results/clientpositive/llap/union30.q.out @@ -94,16 +94,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -132,16 +132,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -194,17 +194,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -230,16 +234,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -265,16 +269,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union31.q.out b/ql/src/test/results/clientpositive/llap/union31.q.out index 48df6c38a7..73330abfd5 100644 --- a/ql/src/test/results/clientpositive/llap/union31.q.out +++ b/ql/src/test/results/clientpositive/llap/union31.q.out @@ -217,31 +217,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -268,31 +272,35 @@ STAGE PLANS: outputColumnNames: value, cnt Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -557,16 +565,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -590,46 +598,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -955,16 +971,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -988,46 +1004,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union33.q.out b/ql/src/test/results/clientpositive/llap/union33.q.out index 8fd7573715..1f03c64782 100644 --- a/ql/src/test/results/clientpositive/llap/union33.q.out +++ b/ql/src/test/results/clientpositive/llap/union33.q.out @@ -71,16 +71,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -112,17 +112,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -165,16 +169,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Union 2 Vertex: Union 2 @@ -322,16 +326,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -376,31 +380,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/union4.q.out b/ql/src/test/results/clientpositive/llap/union4.q.out index 6e412cb773..32b0859974 100644 --- a/ql/src/test/results/clientpositive/llap/union4.q.out +++ b/ql/src/test/results/clientpositive/llap/union4.q.out @@ -107,31 +107,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -161,16 +165,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union6.q.out b/ql/src/test/results/clientpositive/llap/union6.q.out index 5a533b5267..70dda9e493 100644 --- a/ql/src/test/results/clientpositive/llap/union6.q.out +++ b/ql/src/test/results/clientpositive/llap/union6.q.out @@ -82,16 +82,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -119,31 +119,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 5a5396e10d..c707456ce4 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -181,31 +181,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized Reduce Operator Tree: @@ -488,31 +492,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized Reduce Operator Tree: @@ -1177,16 +1185,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -1213,46 +1221,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 34500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2096,16 +2112,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 69000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -2123,46 +2139,54 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3016,16 +3040,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 34500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -3043,46 +3067,54 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(CASE WHEN (val1 is null) THEN (1) ELSE (null) END), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(CASE WHEN (val2 is null) THEN (1) ELSE (null) END), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -4181,12 +4213,12 @@ STAGE PLANS: outputColumnNames: k1, k2, k3, k4, ds Statistics: Num rows: 387 Data size: 141297 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(CASE WHEN (k1 is null) THEN (1) ELSE (null) END), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(CASE WHEN (k2 is null) THEN (1) ELSE (null) END), compute_bit_vector(k2, 'hll'), max(length(k3)), avg(COALESCE(length(k3),0)), count(CASE WHEN (k3 is null) THEN (1) ELSE (null) END), compute_bit_vector(k3, 'hll'), max(length(k4)), avg(COALESCE(length(k4),0)), count(CASE WHEN (k4 is null) THEN (1) ELSE (null) END), compute_bit_vector(k4, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1013 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -4194,39 +4226,39 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1013 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: true Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), max(VALUE._col12), avg(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 741 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 directory: hdfs://### HDFS PATH ### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE Stats Publishing Key Prefix: hdfs://### HDFS PATH ### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -6780,31 +6812,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 187 Data size: 34782 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -8509,31 +8545,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized Reduce Operator Tree: @@ -8811,31 +8851,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -9511,31 +9555,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized Reduce Operator Tree: @@ -9835,16 +9883,16 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Select Operator expressions: _col1 (type: string) outputColumnNames: _col1 @@ -9867,17 +9915,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -9904,31 +9956,35 @@ STAGE PLANS: outputColumnNames: value, cnt Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -10212,16 +10268,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -10245,46 +10301,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized Reduce Operator Tree: @@ -10604,16 +10668,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -10637,46 +10701,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(CASE WHEN (c1 is null) THEN (1) ELSE (null) END), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(CASE WHEN (cnt is null) THEN (1) ELSE (null) END), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -11666,31 +11738,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized Reduce Operator Tree: @@ -11908,31 +11984,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out index 2356da645f..07b0f80ef9 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out @@ -262,31 +262,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 13 Data size: 3536 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9230769 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union_lateralview.q.out b/ql/src/test/results/clientpositive/llap/union_lateralview.q.out index d9d4611d78..04b1ac39b4 100644 --- a/ql/src/test/results/clientpositive/llap/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/llap/union_lateralview.q.out @@ -215,31 +215,35 @@ STAGE PLANS: outputColumnNames: key, arr_ele, value Statistics: Num rows: 3104 Data size: 294880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(arr_ele, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(arr_ele), max(arr_ele), count(CASE WHEN (arr_ele is null) THEN (1) ELSE (null) END), compute_bit_vector(arr_ele, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union_top_level.q.out b/ql/src/test/results/clientpositive/llap/union_top_level.q.out index e0d86b3989..342a7c6702 100644 --- a/ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -554,31 +554,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -606,16 +610,16 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -643,16 +647,16 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 @@ -862,31 +866,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -914,16 +922,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -951,16 +959,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 @@ -1158,31 +1166,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1210,16 +1222,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -1247,16 +1259,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value), max(value), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out b/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out index 9e8d6a07cf..e2f478a39c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out @@ -80,44 +80,48 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -254,44 +258,48 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: no inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out index 738ba89bb2..85f6320d00 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out @@ -719,7 +719,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -739,17 +739,21 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(11,5)), _col1 (type: decimal(11,5)), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), UDFToLong(_col4) (type: bigint), UDFToLong(_col5) (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index f8d6e2d069..5054b1c8d2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -167,46 +167,51 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueExpressions: StringLength(col 0:string) -> 1:int, VectorCoalesce(columns [2, 3])(children: StringLength(col 0:string) -> 2:int, ConstantVectorExpression(val 0) -> 3:int) -> 4:int, IfExprColumnNull(col 5:boolean, col 6:int, null)(children: IsNull(col 0:string) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:int) -> 7:int Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Reducer 5 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 2042f6d508..f389b45e02 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -167,46 +167,51 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueExpressions: StringLength(col 0:string) -> 1:int, VectorCoalesce(columns [2, 3])(children: StringLength(col 0:string) -> 2:int, ConstantVectorExpression(val 0) -> 3:int) -> 4:int, IfExprColumnNull(col 5:boolean, col 6:int, null)(children: IsNull(col 0:string) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:int) -> 7:int Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), CASE WHEN (c1 is null) THEN (1) ELSE (null) END (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary) Reducer 5 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 96c83be6b2..fbeb2e8b70 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -1142,7 +1142,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -1169,36 +1169,40 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1248,7 +1252,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -1275,36 +1279,40 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(CASE WHEN (key1 is null) THEN (1) ELSE (null) END), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(CASE WHEN (key2 is null) THEN (1) ELSE (null) END), compute_bit_vector(key2, 'hll'), min(val), max(val), count(CASE WHEN (val is null) THEN (1) ELSE (null) END), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 948 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index beb3eb503d..bcf8a68a7f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_01_n0 POSTHOOK: Output: default@e011_01_n0 #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_02_n0 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_02_n0 @@ -195,7 +195,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_02_n0 POSTHOOK: Output: default@e011_02_n0 #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_03_n0 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_03_n0 @@ -206,7 +206,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_03_n0 POSTHOOK: Output: default@e011_03_n0 #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_01_small @@ -217,7 +217,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_01_small POSTHOOK: Output: default@e011_01_small #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_02_small @@ -228,7 +228,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_02_small POSTHOOK: Output: default@e011_02_small #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_03_small @@ -239,7 +239,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_03_small POSTHOOK: Output: default@e011_03_small #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: explain vectorization detail select sum(sum(c1)) over() from e011_01_n0 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index e1a10916e5..fed155cae4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -5698,27 +5698,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'LONG' (type: string), UDFToLong(_col20) (type: bigint), UDFToLong(_col21) (type: bigint), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1588 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1588 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5776,36 +5780,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, s Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(CASE WHEN (r is null) THEN (1) ELSE (null) END), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(CASE WHEN (dr is null) THEN (1) ELSE (null) END), compute_bit_vector(dr, 'hll'), min(s), max(s), count(CASE WHEN (s is null) THEN (1) ELSE (null) END), compute_bit_vector(s, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), _col23 (type: binary) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Vectorization: @@ -5956,36 +5964,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, cud, s2, fv1 Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(s2, 'hll'), compute_stats(fv1, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(CASE WHEN (r is null) THEN (1) ELSE (null) END), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(CASE WHEN (dr is null) THEN (1) ELSE (null) END), compute_bit_vector(dr, 'hll'), min(cud), max(cud), count(CASE WHEN (cud is null) THEN (1) ELSE (null) END), compute_bit_vector(cud, 'hll'), min(s2), max(s2), count(CASE WHEN (s2 is null) THEN (1) ELSE (null) END), compute_bit_vector(s2, 'hll'), min(fv1), max(fv1), count(CASE WHEN (fv1 is null) THEN (1) ELSE (null) END), compute_bit_vector(fv1, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: int), _col22 (type: bigint), _col23 (type: binary), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), _col27 (type: binary), _col28 (type: int), _col29 (type: int), _col30 (type: bigint), _col31 (type: binary) Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), min(VALUE._col24), max(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), min(VALUE._col28), max(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'LONG' (type: string), UDFToLong(_col20) (type: bigint), UDFToLong(_col21) (type: bigint), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'LONG' (type: string), UDFToLong(_col28) (type: bigint), UDFToLong(_col29) (type: bigint), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47 + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: @@ -6120,16 +6132,16 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, c, ca, fv Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(c, 'hll'), compute_stats(ca, 'hll'), compute_stats(fv, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(ca), max(ca), count(CASE WHEN (ca is null) THEN (1) ELSE (null) END), compute_bit_vector(ca, 'hll'), min(fv), max(fv), count(CASE WHEN (fv is null) THEN (1) ELSE (null) END), compute_bit_vector(fv, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: int), _col22 (type: bigint), _col23 (type: binary) Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out index 314300f371..17b9bc3975 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -897,7 +897,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -945,36 +945,40 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DOUBLE' (type: string), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index d7f8f73b9b..36de0db5a6 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -109,7 +109,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -134,36 +134,40 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 500882866c..dfc6239034 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -4213,7 +4213,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4271,36 +4271,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, s Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(CASE WHEN (r is null) THEN (1) ELSE (null) END), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(CASE WHEN (dr is null) THEN (1) ELSE (null) END), compute_bit_vector(dr, 'hll'), min(s), max(s), count(CASE WHEN (s is null) THEN (1) ELSE (null) END), compute_bit_vector(s, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), _col23 (type: binary) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'DOUBLE' (type: string), _col20 (type: double), _col21 (type: double), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Vectorization: @@ -4414,36 +4418,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(CASE WHEN (p_mfgr is null) THEN (1) ELSE (null) END), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(CASE WHEN (p_name is null) THEN (1) ELSE (null) END), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(CASE WHEN (p_size is null) THEN (1) ELSE (null) END), compute_bit_vector(p_size, 'hll'), min(s2), max(s2), count(CASE WHEN (s2 is null) THEN (1) ELSE (null) END), compute_bit_vector(s2, 'hll'), min(r), max(r), count(CASE WHEN (r is null) THEN (1) ELSE (null) END), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(CASE WHEN (dr is null) THEN (1) ELSE (null) END), compute_bit_vector(dr, 'hll'), min(cud), max(cud), count(CASE WHEN (cud is null) THEN (1) ELSE (null) END), compute_bit_vector(cud, 'hll'), min(fv1), max(fv1), count(CASE WHEN (fv1 is null) THEN (1) ELSE (null) END), compute_bit_vector(fv1, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: int), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary), _col16 (type: int), _col17 (type: int), _col18 (type: bigint), _col19 (type: binary), _col20 (type: int), _col21 (type: int), _col22 (type: bigint), _col23 (type: binary), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), _col27 (type: binary), _col28 (type: int), _col29 (type: int), _col30 (type: bigint), _col31 (type: binary) Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15), min(VALUE._col16), max(VALUE._col17), count(VALUE._col18), compute_bit_vector(VALUE._col19), min(VALUE._col20), max(VALUE._col21), count(VALUE._col22), compute_bit_vector(VALUE._col23), min(VALUE._col24), max(VALUE._col25), count(VALUE._col26), compute_bit_vector(VALUE._col27), min(VALUE._col28), max(VALUE._col29), count(VALUE._col30), compute_bit_vector(VALUE._col31) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), UDFToLong(_col8) (type: bigint), UDFToLong(_col9) (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), 'LONG' (type: string), UDFToLong(_col16) (type: bigint), UDFToLong(_col17) (type: bigint), _col18 (type: bigint), COALESCE(ndv_compute_bit_vector(_col19),0) (type: bigint), _col19 (type: binary), 'LONG' (type: string), UDFToLong(_col20) (type: bigint), UDFToLong(_col21) (type: bigint), _col22 (type: bigint), COALESCE(ndv_compute_bit_vector(_col23),0) (type: bigint), _col23 (type: binary), 'DOUBLE' (type: string), _col24 (type: double), _col25 (type: double), _col26 (type: bigint), COALESCE(ndv_compute_bit_vector(_col27),0) (type: bigint), _col27 (type: binary), 'LONG' (type: string), UDFToLong(_col28) (type: bigint), UDFToLong(_col29) (type: bigint), _col30 (type: bigint), COALESCE(ndv_compute_bit_vector(_col31),0) (type: bigint), _col31 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47 + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index 9e2c40d157..c2af77f237 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -1351,11 +1351,11 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1392,22 +1392,26 @@ STAGE PLANS: Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain analyze table src compute statistics PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/nullformatCTAS.q.out b/ql/src/test/results/clientpositive/nullformatCTAS.q.out index 792abe701a..d1dc3abe8b 100644 --- a/ql/src/test/results/clientpositive/nullformatCTAS.q.out +++ b/ql/src/test/results/clientpositive/nullformatCTAS.q.out @@ -81,29 +81,33 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/smb_mapjoin9.q.out b/ql/src/test/results/clientpositive/smb_mapjoin9.q.out index a5588bc8b8..ff245690b6 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -299,29 +299,33 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4 Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll'), min(col4), max(col4), count(CASE WHEN (col4 is null) THEN (1) ELSE (null) END), compute_bit_vector(col4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary), _col12 (type: int), _col13 (type: int), _col14 (type: bigint), _col15 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'LONG' (type: string), UDFToLong(_col12) (type: bigint), UDFToLong(_col13) (type: bigint), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 0a8355d03f..09a207f03e 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -88,12 +88,12 @@ STAGE PLANS: outputColumnNames: key, value1, value2, ds Statistics: Num rows: 500 Data size: 225500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -130,23 +130,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -266,12 +266,12 @@ STAGE PLANS: outputColumnNames: key, value1, value2, ds Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(value1), max(value1), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1389 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -308,23 +308,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1389 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 573 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1491,12 +1491,12 @@ STAGE PLANS: outputColumnNames: key, value1, value2, ds Statistics: Num rows: 500 Data size: 225500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(CASE WHEN (value1 is null) THEN (1) ELSE (null) END), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(CASE WHEN (value2 is null) THEN (1) ELSE (null) END), compute_bit_vector(value2, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1533,23 +1533,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 781 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index f5eef92280..03744bacab 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -167,12 +167,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -209,23 +209,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,12 +306,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -348,23 +348,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -446,12 +446,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -488,23 +488,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -586,12 +586,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -628,23 +628,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -726,12 +726,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -768,23 +768,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index 38da086cac..1417521b6c 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -102,19 +102,21 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1/1 width=440) - Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_2] - table:{"name:":"default.t_n28"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - PARTITION_ONLY_SHUFFLE [RS_8] - Select Operator [SEL_7] (rows=500/500 width=87) - Output:["col1"] - Please refer to the previous Select Operator [SEL_1] + Select Operator [SEL_10] (rows=1/1 width=266) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_9] (rows=1/1 width=164) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t_n28"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_8] + Select Operator [SEL_7] (rows=500/500 width=87) + Output:["col1"] + Please refer to the previous Select Operator [SEL_1] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -161,19 +163,21 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_8] - Group By Operator [GBY_6] (rows=1/1 width=440) - Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_2] - table:{"name:":"default.t_n28"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - PARTITION_ONLY_SHUFFLE [RS_5] - Select Operator [SEL_4] (rows=500/500 width=87) - Output:["key"] - Please refer to the previous Select Operator [SEL_1] + Select Operator [SEL_7] (rows=1/1 width=266) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_6] (rows=1/1 width=164) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t_n28"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_5] + Select Operator [SEL_4] (rows=500/500 width=87) + Output:["key"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: select key from src limit 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 5af43def9c..08b1af7101 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -298,14 +298,16 @@ Stage-2 Stage-0 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500/500 width=178) - default@src_stats,src_stats,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_4] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_3] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500/500 width=178) + default@src_stats,src_stats,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) PREHOOK: type: CREATEMACRO @@ -412,19 +414,21 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - File Output Operator [FS_13] - table:{"name:":"default.src_autho_test_n4"} - Select Operator [SEL_12] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_15] - Select Operator [SEL_14] (rows=500/500 width=178) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_12] + Select Operator [SEL_10] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_9] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + File Output Operator [FS_13] + table:{"name:":"default.src_autho_test_n4"} + Select Operator [SEL_12] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500/500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_15] + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_12] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -769,21 +773,23 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_9] - Group By Operator [GBY_7] (rows=1/1 width=2824) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - File Output Operator [FS_14] - table:{"name:":"default.orc_merge5_n1"} - Select Operator [SEL_13] (rows=1/3 width=352) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_12] (rows=1/3 width=352) - predicate:(userid <= 13L) - TableScan [TS_0] (rows=1/15000 width=352) - default@orc_merge5_n1,orc_merge5_n1,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - PARTITION_ONLY_SHUFFLE [RS_16] - Select Operator [SEL_15] (rows=1/3 width=352) - Output:["userid","string1","subtype","decimal1","ts"] - Please refer to the previous Select Operator [SEL_13] + Select Operator [SEL_8] (rows=1/1 width=1460) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] + Group By Operator [GBY_7] (rows=1/1 width=1460) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"],aggregations:["min(VALUE._col0)","max(VALUE._col0)","count(VALUE._col1)","compute_bit_vector(VALUE._col0, 'hll')","max(VALUE._col3)","avg(VALUE._col4)","count(VALUE._col5)","compute_bit_vector(VALUE._col6, 'hll')","min(VALUE._col7)","max(VALUE._col7)","count(VALUE._col8)","compute_bit_vector(VALUE._col7, 'hll')","min(VALUE._col9)","max(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col9, 'hll')","min(VALUE._col11)","max(VALUE._col11)","count(VALUE._col12)","compute_bit_vector(VALUE._col11, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + File Output Operator [FS_14] + table:{"name:":"default.orc_merge5_n1"} + Select Operator [SEL_13] (rows=1/3 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_12] (rows=1/3 width=352) + predicate:(userid <= 13L) + TableScan [TS_0] (rows=1/15000 width=352) + default@orc_merge5_n1,orc_merge5_n1,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_16] + Select Operator [SEL_15] (rows=1/3 width=352) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_13] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 5088a3d155..efc9feebaa 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -64,14 +64,16 @@ Stage-2 Stage-0 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500/500 width=178) - default@src_stats_n0,src_stats_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_4] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_3] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500/500 width=178) + default@src_stats_n0,src_stats_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_multi2_n7 PREHOOK: type: DROPTABLE @@ -124,54 +126,56 @@ Stage-3 Stage-1 Reducer 5 File Output Operator [FS_25] - Group By Operator [GBY_23] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_19] - table:{"name:":"default.src_multi2_n7"} - Select Operator [SEL_18] (rows=830/508 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_43] (rows=830/508 width=178) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_12] (rows=500/500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=525/319 width=87) - Output:["_col0"] - Group By Operator [GBY_10] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_48] - PartitionCols:_col0, _col1 - Select Operator [SEL_46] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_45] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_44] (rows=500/500 width=178) - Output:["key","value"] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_53] - PartitionCols:_col0, _col1 - Select Operator [SEL_51] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_22] - Select Operator [SEL_21] (rows=830/508 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_18] + Select Operator [SEL_24] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_23] (rows=1/1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_19] + table:{"name:":"default.src_multi2_n7"} + Select Operator [SEL_18] (rows=830/508 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_43] (rows=830/508 width=178) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col2"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_31] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_12] (rows=500/500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=525/319 width=87) + Output:["_col0"] + Group By Operator [GBY_10] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_48] + PartitionCols:_col0, _col1 + Select Operator [SEL_46] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_45] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_44] (rows=500/500 width=178) + Output:["key","value"] + <-Map 6 [CONTAINS] + Reduce Output Operator [RS_53] + PartitionCols:_col0, _col1 + Select Operator [SEL_51] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_50] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_49] (rows=25/25 width=175) + Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_22] + Select Operator [SEL_21] (rows=830/508 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_18] PREHOOK: query: select count(*) from (select * from src union select * from src1)subq PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index d886a26f78..d11587c9f1 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -254,16 +254,18 @@ Stage-2 Stage-0 Reducer 2 File Output Operator [FS_6] - Group By Operator [GBY_4] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_1] (rows=500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_5] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_4] (rows=1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(CASE WHEN (key is null) THEN (1) ELSE (null) END)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(CASE WHEN (value is null) THEN (1) ELSE (null) END)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_1] (rows=500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) @@ -340,21 +342,23 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_12] - Group By Operator [GBY_10] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_2] - table:{"name:":"default.src_autho_test_n3"} - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_9] - Group By Operator [GBY_8] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_7] (rows=500 width=178) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_1] + Select Operator [SEL_11] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_10] (rows=1 width=328) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.src_autho_test_n3"} + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_9] + Group By Operator [GBY_8] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col1, 'hll')","max(length(col2))","avg(COALESCE(length(col2),0))","count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_7] (rows=500 width=178) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_1] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -617,23 +621,25 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_10] - Group By Operator [GBY_8] (rows=1 width=2824) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_3] - table:{"name:":"default.orc_merge5_n0"} - Select Operator [SEL_2] (rows=1 width=352) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_11] (rows=1 width=352) - predicate:(userid <= 13L) - TableScan [TS_0] (rows=1 width=352) - default@orc_merge5_n0,orc_merge5_n0,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - PARTITION_ONLY_SHUFFLE [RS_7] - Group By Operator [GBY_6] (rows=1 width=2760) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(userid, 'hll')","compute_stats(string1, 'hll')","compute_stats(subtype, 'hll')","compute_stats(decimal1, 'hll')","compute_stats(ts, 'hll')"] - Select Operator [SEL_5] (rows=1 width=352) - Output:["userid","string1","subtype","decimal1","ts"] - Please refer to the previous Select Operator [SEL_2] + Select Operator [SEL_9] (rows=1 width=1528) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] + Group By Operator [GBY_8] (rows=1 width=1528) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","max(VALUE._col4)","avg(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)","min(VALUE._col8)","max(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col11)","min(VALUE._col12)","max(VALUE._col13)","count(VALUE._col14)","compute_bit_vector(VALUE._col15)","min(VALUE._col16)","max(VALUE._col17)","count(VALUE._col18)","compute_bit_vector(VALUE._col19)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5_n0"} + Select Operator [SEL_2] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_11] (rows=1 width=352) + predicate:(userid <= 13L) + TableScan [TS_0] (rows=1 width=352) + default@orc_merge5_n0,orc_merge5_n0,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_7] + Group By Operator [GBY_6] (rows=1 width=1528) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"],aggregations:["min(userid)","max(userid)","count(CASE WHEN (userid is null) THEN (1) ELSE (null) END)","compute_bit_vector(userid, 'hll')","max(length(string1))","avg(COALESCE(length(string1),0))","count(CASE WHEN (string1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(string1, 'hll')","min(subtype)","max(subtype)","count(CASE WHEN (subtype is null) THEN (1) ELSE (null) END)","compute_bit_vector(subtype, 'hll')","min(decimal1)","max(decimal1)","count(CASE WHEN (decimal1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(decimal1, 'hll')","min(ts)","max(ts)","count(CASE WHEN (ts is null) THEN (1) ELSE (null) END)","compute_bit_vector(ts, 'hll')"] + Select Operator [SEL_5] (rows=1 width=352) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out index cfe4481326..ddf7bfc49f 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out @@ -42,66 +42,68 @@ Stage-3 Stage-1 Reducer 3 File Output Operator [FS_23] - Group By Operator [GBY_21] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Union 2 [CUSTOM_SIMPLE_EDGE] - <-Map 1 [CONTAINS] - File Output Operator [FS_31] - table:{"name:":"default.x"} - Select Operator [SEL_30] (rows=6 width=91) - Output:["_col0","_col1"] - Select Operator [SEL_28] (rows=2 width=91) - Output:["_col1"] - Filter Operator [FIL_27] (rows=2 width=87) - predicate:(key = '238') - TableScan [TS_26] (rows=500 width=87) - Output:["key"] - Reduce Output Operator [RS_34] - Group By Operator [GBY_33] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_32] (rows=6 width=91) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_30] - <-Map 4 [CONTAINS] - File Output Operator [FS_45] - table:{"name:":"default.x"} - Select Operator [SEL_44] (rows=6 width=91) - Output:["_col0","_col1"] - Select Operator [SEL_42] (rows=4 width=87) - Output:["_col1"] - Lateral View Join Operator [LVJ_40] (rows=4 width=239) - Output:["_col5"] - Select Operator [SEL_38] (rows=2 width=431) - Lateral View Forward [LVF_37] (rows=2 width=86) - Filter Operator [FIL_36] (rows=2 width=86) - predicate:(key = '238') - TableScan [TS_35] (rows=25 width=86) - Output:["key"] - Reduce Output Operator [RS_48] - Group By Operator [GBY_47] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_46] (rows=6 width=91) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_44] - File Output Operator [FS_45] - table:{"name:":"default.x"} - Select Operator [SEL_44] (rows=6 width=91) - Output:["_col0","_col1"] - Select Operator [SEL_42] (rows=4 width=87) - Output:["_col1"] - Lateral View Join Operator [LVJ_40] (rows=4 width=239) - Output:["_col5"] - UDTF Operator [UDTF_41] (rows=2 width=48) - function name:explode - Select Operator [SEL_39] (rows=2 width=48) - Output:["_col0"] - Please refer to the previous Lateral View Forward [LVF_37] - Reduce Output Operator [RS_48] - Group By Operator [GBY_47] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_46] (rows=6 width=91) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_44] + Select Operator [SEL_22] (rows=1 width=530) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_21] (rows=1 width=324) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","compute_bit_vector(VALUE._col3)","min(VALUE._col4)","max(VALUE._col5)","count(VALUE._col6)","compute_bit_vector(VALUE._col7)"] + <-Union 2 [CUSTOM_SIMPLE_EDGE] + <-Map 1 [CONTAINS] + File Output Operator [FS_31] + table:{"name:":"default.x"} + Select Operator [SEL_30] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_28] (rows=2 width=91) + Output:["_col1"] + Filter Operator [FIL_27] (rows=2 width=87) + predicate:(key = '238') + TableScan [TS_26] (rows=500 width=87) + Output:["key"] + Reduce Output Operator [RS_34] + Group By Operator [GBY_33] (rows=1 width=392) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col1, 'hll')","min(col2)","max(col2)","count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_32] (rows=6 width=91) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_30] + <-Map 4 [CONTAINS] + File Output Operator [FS_45] + table:{"name:":"default.x"} + Select Operator [SEL_44] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_42] (rows=4 width=87) + Output:["_col1"] + Lateral View Join Operator [LVJ_40] (rows=4 width=239) + Output:["_col5"] + Select Operator [SEL_38] (rows=2 width=431) + Lateral View Forward [LVF_37] (rows=2 width=86) + Filter Operator [FIL_36] (rows=2 width=86) + predicate:(key = '238') + TableScan [TS_35] (rows=25 width=86) + Output:["key"] + Reduce Output Operator [RS_48] + Group By Operator [GBY_47] (rows=1 width=392) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col1, 'hll')","min(col2)","max(col2)","count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_46] (rows=6 width=91) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_45] + table:{"name:":"default.x"} + Select Operator [SEL_44] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_42] (rows=4 width=87) + Output:["_col1"] + Lateral View Join Operator [LVJ_40] (rows=4 width=239) + Output:["_col5"] + UDTF Operator [UDTF_41] (rows=2 width=48) + function name:explode + Select Operator [SEL_39] (rows=2 width=48) + Output:["_col0"] + Please refer to the previous Lateral View Forward [LVF_37] + Reduce Output Operator [RS_48] + Group By Operator [GBY_47] (rows=1 width=392) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col1, 'hll')","min(col2)","max(col2)","count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_46] (rows=6 width=91) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_44] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6)