diff --git a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index fde55a2f49..c47ca87d27 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -96,16 +96,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9405 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,17 +123,21 @@ STAGE PLANS: name: default.src_x2 Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out index ae377077c6..c9f70b86ec 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -81,29 +81,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 65582e7c7b..74b1c6c699 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -77,29 +77,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: smallint), _col1 (type: smallint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index 7242473aff..94b6566d0d 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -77,29 +77,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index bf289a9fde..5bd0e3cc99 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -88,17 +88,17 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 92f2eaded1..5c4c981ac7 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -81,29 +81,33 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 1adcf67510..a90bb5f4de 100644 --- a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -96,16 +96,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9405 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,17 +123,21 @@ STAGE PLANS: name: default.src_x2 Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator diff --git a/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out b/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out index 1cd55fa9c6..97b9fb78e7 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/explain.q.out @@ -88,17 +88,17 @@ STAGE PLANS: outputColumnNames: cnt Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(cnt, 'hll') + aggregations: min(cnt), max(cnt), count(1), count(cnt), compute_bit_vector(cnt, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index ce071cba37..51b51200fa 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -110,19 +110,19 @@ STAGE PLANS: outputColumnNames: id Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id, 'hll') + aggregations: min(id), max(id), count(1), count(id), compute_bit_vector(id, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -164,34 +164,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out index f65bf22eda..502fdd2377 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out @@ -118,19 +118,19 @@ STAGE PLANS: outputColumnNames: id Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id, 'hll') + aggregations: min(id), max(id), count(1), count(id), compute_bit_vector(id, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -172,34 +172,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 8bfc66795e..7214dcd8a5 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -215,29 +215,29 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0 - columns.types struct + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -471,29 +471,29 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0 - columns.types struct + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:bigint:bigint:bigint:binary escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 1a6fc4c1cc..44831368be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -468,6 +468,8 @@ system.registerGenericUDAF("context_ngrams", new GenericUDAFContextNGrams()); system.registerGenericUDAF("compute_stats", new GenericUDAFComputeStats()); + system.registerGenericUDF("ndv_compute_bit_vector", GenericUDFNDVComputeBitVector.class); + system.registerGenericUDAF("compute_bit_vector", new GenericUDAFComputeBitVector()); system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter()); system.registerGenericUDAF("approx_distinct", new GenericUDAFApproximateDistinct()); system.registerUDAF("percentile", UDAFPercentile.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index d25cadf7ea..07c2310e1a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -131,7 +131,7 @@ public void insertTableValuesAnalyzePipeline() throws SemanticException { } } String command = ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, Utilities.getColumnNamesFromFieldSchema(tbl.getCols()), conf, partSpec, isPartitionStats, true); + tbl, conf, partSpec, isPartitionStats, true); insertAnalyzePipeline(command, true); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 2787b47b2e..b4bedc5061 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hive.ql.metadata.HiveUtils.unparseIdentifier; +import com.google.common.base.Preconditions; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -28,7 +29,6 @@ import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; @@ -40,7 +40,11 @@ import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsField; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -74,17 +78,6 @@ public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticExcepti super(queryState); } - public static String getQuote(HiveConf conf) { - String qIdSupport = conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT); - if ("column".equals(qIdSupport)) { - return "`"; - } else if ("standard".equals(qIdSupport)) { - return "\""; - } else { - return ""; - } - } - private boolean shouldRewrite(ASTNode tree) { boolean rwt = false; if (tree.getChildCount() > 1) { @@ -204,7 +197,7 @@ private static String getColTypeOf(Table tbl, String partKey) throws SemanticExc throw new SemanticException("Unknown partition key : " + partKey); } - private static List getColumnTypes(Table tbl, List colNames) { + protected static List getColumnTypes(Table tbl, List colNames) { List colTypes = new ArrayList(); List cols = tbl.getCols(); List copyColNames = new ArrayList<>(colNames); @@ -227,15 +220,29 @@ private static String getColTypeOf(Table tbl, String partKey) throws SemanticExc return colTypes; } - private String genRewrittenQuery(List colNames, HiveConf conf, Map partSpec, - boolean isPartitionStats) throws SemanticException { - String rewritten = genRewrittenQuery(tbl, colNames, conf, partSpec, isPartitionStats, false); + private String genRewrittenQuery(List colNames, List colTypes, HiveConf conf, + Map partSpec, boolean isPartitionStats) throws SemanticException { + String rewritten = genRewrittenQuery(tbl, colNames, colTypes, conf, partSpec, isPartitionStats, false); isRewritten = true; return rewritten; } - public static String genRewrittenQuery(Table tbl, List colNames, HiveConf conf, Map partSpec, - boolean isPartitionStats, boolean useTableValues) throws SemanticException{ + /** + * Generates a SQL statement that will compute the stats for all columns + * included in the input table. + */ + protected static String genRewrittenQuery(Table tbl, + HiveConf conf, Map partSpec, boolean isPartitionStats, + boolean useTableValues) throws SemanticException { + List colNames = Utilities.getColumnNamesFromFieldSchema(tbl.getCols()); + List colTypes = ColumnStatsSemanticAnalyzer.getColumnTypes(tbl, colNames); + return ColumnStatsSemanticAnalyzer.genRewrittenQuery( + tbl, colNames, colTypes, conf, partSpec, isPartitionStats, useTableValues); + } + + private static String genRewrittenQuery(Table tbl, List colNames, List colTypes, + HiveConf conf, Map partSpec, boolean isPartitionStats, + boolean useTableValues) throws SemanticException { StringBuilder rewrittenQueryBuilder = new StringBuilder("select "); StringBuilder columnNamesBuilder = new StringBuilder(); @@ -246,26 +253,15 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon columnNamesBuilder.append(" , "); columnDummyValuesBuilder.append(" , "); } - String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); - rewrittenQueryBuilder.append("compute_stats("); + final String columnName = unparseIdentifier(colNames.get(i), conf); - rewrittenQueryBuilder.append(columnName); - rewrittenQueryBuilder.append(", '" + func + "'"); - if ("fm".equals(func)) { - int numBitVectors = 0; - try { - numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); - } catch (Exception e) { - throw new SemanticException(e.getMessage()); - } - rewrittenQueryBuilder.append(", " + numBitVectors); - } - rewrittenQueryBuilder.append(')'); + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colTypes.get(i)); + genComputeStats(rewrittenQueryBuilder, conf, i, columnName, typeInfo); columnNamesBuilder.append(unparseIdentifier(columnName, conf)); columnDummyValuesBuilder.append( - "cast(null as " + TypeInfoUtils.getTypeInfoFromTypeString(tbl.getCols().get(i).getType()).toString() + ")"); + "cast(null as " + typeInfo.toString() + ")"); } if (isPartitionStats) { @@ -304,15 +300,197 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon } String rewrittenQuery = rewrittenQueryBuilder.toString(); - rewrittenQuery = new VariableSubstitution(new HiveVariableSource() { - @Override - public Map getHiveVariable() { - return SessionState.get().getHiveVariables(); - } - }).substitute(conf, rewrittenQuery); + rewrittenQuery = new VariableSubstitution( + () -> SessionState.get().getHiveVariables()).substitute(conf, rewrittenQuery); return rewrittenQuery; } + private static void genComputeStats(StringBuilder rewrittenQueryBuilder, HiveConf conf, + int pos, String columnName, TypeInfo typeInfo) throws SemanticException { + Preconditions.checkArgument(typeInfo.getCategory() == Category.PRIMITIVE); + ColumnStatsType columnStatsType = + ColumnStatsType.getColumnStatsType((PrimitiveTypeInfo) typeInfo); + // The first column is always the type + // The rest of columns will depend on the type itself + for (int i = 0; i < columnStatsType.getColumnStats().size(); i++) { + if (i > 0) { + rewrittenQueryBuilder.append(", "); + } + ColumnStatsField columnStatsField = columnStatsType.getColumnStats().get(i); + appendStatsField(rewrittenQueryBuilder, conf, columnStatsField, columnStatsType, + columnName, pos); + } + } + + private static void appendStatsField(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsField columnStatsField, ColumnStatsType columnStatsType, + String columnName, int pos) throws SemanticException { + switch (columnStatsField) { + case COLUMN_STATS_TYPE: + appendColumnType(rewrittenQueryBuilder, conf, columnStatsType, pos); + break; + case COUNT_TRUES: + appendCountTrues(rewrittenQueryBuilder, conf, columnName, pos); + break; + case COUNT_FALSES: + appendCountFalses(rewrittenQueryBuilder, conf, columnName, pos); + break; + case COUNT_NULLS: + appendCountNulls(rewrittenQueryBuilder, conf, columnName, pos); + break; + case MIN: + appendMin(rewrittenQueryBuilder, conf, columnStatsType, columnName, pos); + break; + case MAX: + appendMax(rewrittenQueryBuilder, conf, columnStatsType, columnName, pos); + break; + case NDV: + appendNDV(rewrittenQueryBuilder, conf, columnName, pos); + break; + case BITVECTOR: + appendBitVector(rewrittenQueryBuilder, conf, columnName, pos); + break; + case MAX_LENGTH: + appendMaxLength(rewrittenQueryBuilder, conf, columnName, pos); + break; + case AVG_LENGTH: + appendAvgLength(rewrittenQueryBuilder, conf, columnName, pos); + break; + default: + throw new SemanticException("Not supported field " + columnStatsField); + } + } + + private static void appendColumnType(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, int pos) { + rewrittenQueryBuilder.append("'") + .append(columnStatsType.toString()) + .append("' AS ") + .append(unparseIdentifier(ColumnStatsField.COLUMN_STATS_TYPE.getFieldName() + pos, conf)); + } + + private static void appendMin(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, String columnName, int pos) { + switch (columnStatsType) { + case LONG: + rewrittenQueryBuilder.append("CAST(min(") + .append(columnName) + .append(") AS bigint) AS "); + break; + case DOUBLE: + rewrittenQueryBuilder.append("CAST(min(") + .append(columnName) + .append(") AS double) AS "); + break; + default: + rewrittenQueryBuilder.append("min(") + .append(columnName) + .append(") AS "); + break; + } + rewrittenQueryBuilder.append( + unparseIdentifier(ColumnStatsField.MIN.getFieldName() + pos, conf)); + } + + private static void appendMax(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, String columnName, int pos) { + switch (columnStatsType) { + case LONG: + rewrittenQueryBuilder.append("CAST(max(") + .append(columnName) + .append(") AS bigint) AS "); + break; + case DOUBLE: + rewrittenQueryBuilder.append("CAST(max(") + .append(columnName) + .append(") AS double) AS "); + break; + default: + rewrittenQueryBuilder.append("max(") + .append(columnName) + .append(") AS "); + break; + } + rewrittenQueryBuilder.append( + unparseIdentifier(ColumnStatsField.MAX.getFieldName() + pos, conf)); + } + + private static void appendMaxLength(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(COALESCE(max(LENGTH(") + .append(columnName) + .append(")), 0) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.MAX_LENGTH.getFieldName() + pos, conf)); + } + + private static void appendAvgLength(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(COALESCE(avg(COALESCE(LENGTH(") + .append(columnName) + .append("), 0)), 0) AS double) AS ") + .append(unparseIdentifier(ColumnStatsField.AVG_LENGTH.getFieldName() + pos, conf)); + } + + private static void appendCountNulls(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(1) - count(") + .append(columnName) + .append(") AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_NULLS.getFieldName() + pos, conf)); + } + + private static void appendNDV(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) throws SemanticException { + rewrittenQueryBuilder.append("COALESCE(NDV_COMPUTE_BIT_VECTOR("); + appendBitVector(rewrittenQueryBuilder, conf, columnName); + rewrittenQueryBuilder.append("), 0) AS ") + .append(unparseIdentifier(ColumnStatsField.NDV.getFieldName() + pos, conf)); + } + + private static void appendBitVector(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) throws SemanticException { + appendBitVector(rewrittenQueryBuilder, conf, columnName); + rewrittenQueryBuilder.append(" AS ") + .append(unparseIdentifier(ColumnStatsField.BITVECTOR.getFieldName() + pos, conf)); + } + + private static void appendBitVector(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName) throws SemanticException { + String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); + rewrittenQueryBuilder.append("compute_bit_vector(") + .append(columnName) + .append(", '") + .append(func) + .append("'"); + if ("fm".equals(func)) { + int numBitVectors; + try { + numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + rewrittenQueryBuilder.append(", ") + .append(numBitVectors); + } + rewrittenQueryBuilder.append(")"); + } + + private static void appendCountTrues(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS TRUE THEN 1 ELSE null END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_TRUES.getFieldName() + pos, conf)); + } + + private static void appendCountFalses(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS FALSE THEN 1 ELSE null END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_FALSES.getFieldName() + pos, conf)); + } + private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException { // Parse the rewritten query string try { @@ -398,7 +576,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { isTableLevel = true; } colType = getColumnTypes(tbl, colNames); - rewrittenQuery = genRewrittenQuery(colNames, conf, partSpec, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); } else { // Not an analyze table column compute statistics statement - don't do any rewrites @@ -467,7 +645,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) isTableLevel = !isPartitionStats; - rewrittenQuery = genRewrittenQuery(colNames, conf, partSpec, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); return rewrittenTree; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index 1a339633d4..53c5b1de08 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.stats; +import com.google.common.collect.ImmutableList; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -41,6 +42,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.session.SessionState; @@ -49,11 +51,14 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + public class ColStatsProcessor implements IStatsProcessor { private static transient final Logger LOG = LoggerFactory.getLogger(ColStatsProcessor.class); @@ -87,10 +92,8 @@ public int process(Hive db, Table tbl) throws Exception { return persistColumnStats(db, tbl); } - private List constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException { - - Table tbl = tbl1; - + private List constructColumnStatsFromPackedRows(Table tbl) + throws HiveException, MetaException, IOException { String partName = null; List colName = colStatDesc.getColName(); List colType = colStatDesc.getColType(); @@ -103,22 +106,21 @@ public int process(Hive db, Table tbl) throws Exception { throw new HiveException("Unexpected object type encountered while unpacking row"); } - List statsObjs = new ArrayList(); - StructObjectInspector soi = (StructObjectInspector) packedRow.oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(packedRow.o); + final List statsObjs = new ArrayList<>(); + final StructObjectInspector soi = (StructObjectInspector) packedRow.oi; + final List fields = soi.getAllStructFieldRefs(); + final List values = soi.getStructFieldsDataAsList(packedRow.o); - List partColSchema = tbl.getPartCols(); // Partition columns are appended at end, we only care about stats column - int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); - assert list != null; - for (int i = 0; i < numOfStatCols; i++) { - StructField structField = fields.get(i); + int pos = 0; + for (int i = 0; i < colName.size(); i++) { String columnName = colName.get(i); String columnType = colType.get(i); - Object values = list.get(i); + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(columnType); + List columnStatsFields = ColumnStatsType.getColumnStats(typeInfo); try { - ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values); + ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveColumnStatistics( + columnName, columnType, columnStatsFields, pos, fields, values); statsObjs.add(statObj); } catch (Exception e) { if (isStatsReliable) { @@ -127,15 +129,17 @@ public int process(Hive db, Table tbl) throws Exception { LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e); } } + pos += columnStatsFields.size(); } if (!statsObjs.isEmpty()) { - if (!isTblLevel) { - List partVals = new ArrayList(); + List partColSchema = tbl.getPartCols(); + List partVals = new ArrayList<>(); // Iterate over partition columns to figure out partition name - for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i)); + for (int i = pos; i < pos + partColSchema.size(); i++) { + Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(values.get(i)); partVals.add(partVal == null ? // could be null for default partition this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); } @@ -196,4 +200,148 @@ public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaExce public void setDpPartSpecs(Collection dpPartSpecs) { } + /** + * Enumeration of column stats fields that can currently + * be computed. Each one has a field name associated. + */ + public enum ColumnStatsField { + COLUMN_STATS_TYPE("columntype"), + COUNT_TRUES("counttrues"), + COUNT_FALSES("countfalses"), + COUNT_NULLS("countnulls"), + MIN("min"), + MAX("max"), + NDV("numdistinctvalues"), + BITVECTOR("ndvbitvector"), + MAX_LENGTH("maxlength"), + AVG_LENGTH("avglength"); + + private final String fieldName; + + ColumnStatsField(String fieldName) { + this.fieldName = fieldName; + } + + public String getFieldName() { + return fieldName; + } + } + + /** + * Enumeration of column stats type. Each Hive primitive type maps into a single + * column stats type, e.g., byte, short, int, and bigint types map into long + * column type. Each column stats type has _n_ column stats fields associated + * with it. + */ + public enum ColumnStatsType { + BOOLEAN( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.COUNT_TRUES, + ColumnStatsField.COUNT_FALSES, + ColumnStatsField.COUNT_NULLS)), + LONG( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + DOUBLE( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + STRING( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MAX_LENGTH, + ColumnStatsField.AVG_LENGTH, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + BINARY( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MAX_LENGTH, + ColumnStatsField.AVG_LENGTH, + ColumnStatsField.COUNT_NULLS)), + DECIMAL( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + DATE( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + TIMESTAMP( + ImmutableList.of( + ColumnStatsField.COLUMN_STATS_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)); + + + private final List columnStats; + + ColumnStatsType(List columnStats) { + this.columnStats = columnStats; + } + + public List getColumnStats() { + return columnStats; + } + + public static ColumnStatsType getColumnStatsType(PrimitiveTypeInfo typeInfo) + throws SemanticException { + switch (typeInfo.getPrimitiveCategory()) { + case BOOLEAN: + return BOOLEAN; + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMPLOCALTZ: + return LONG; + case FLOAT: + case DOUBLE: + return DOUBLE; + case DECIMAL: + return DECIMAL; + case DATE: + return DATE; + case TIMESTAMP: + return TIMESTAMP; + case STRING: + case CHAR: + case VARCHAR: + return STRING; + case BINARY: + return BINARY; + default: + throw new SemanticException("Not supported type " + + typeInfo.getTypeName() + " for statistics computation"); + } + } + + public static List getColumnStats(PrimitiveTypeInfo typeInfo) + throws SemanticException { + return getColumnStatsType(typeInfo).getColumnStats(); + } + + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index e6926d3d18..799bc386e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -37,12 +37,13 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsField; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; @@ -50,40 +51,46 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public class ColumnStatisticsObjTranslator { - private static transient final Logger LOG = LoggerFactory - .getLogger(ColumnStatisticsObjTranslator.class); +public class ColumnStatisticsObjTranslator { - public static ColumnStatisticsObj readHiveStruct(String columnName, String columnType, StructField structField, Object values) - throws HiveException - { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = structField.getFieldObjectInspector(); - Object f = values; - String fieldName = structField.getFieldName(); + public static ColumnStatisticsObj readHiveColumnStatistics(String columnName, String columnType, + List columnStatsFields, int start, List fields, + List values) throws HiveException { ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColName(columnName); statsObj.setColType(columnType); - try { - unpackStructObject(foi, f, fieldName, statsObj); - return statsObj; - } catch (Exception e) { - throw new HiveException("error calculating stats for column:" + structField.getFieldName(), e); + + int end = start + columnStatsFields.size(); + for (int i = start; i < end; i++) { + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object v = values.get(i); + try { + unpackPrimitiveObject(foi, v, columnStatsFields.get(i - start), statsObj); + } catch (Exception e) { + throw new HiveException("Error calculating statistics for column:" + columnName, e); + } } + + return statsObj; } - private static void unpackBooleanStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { + private static void unpackBooleanStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { long v = ((LongObjectInspector) oi).get(o); - if (fName.equals("counttrues")) { + switch (csf) { + case COUNT_TRUES: statsObj.getStatsData().getBooleanStats().setNumTrues(v); - } else if (fName.equals("countfalses")) { + break; + case COUNT_FALSES: statsObj.getStatsData().getBooleanStats().setNumFalses(v); - } else if (fName.equals("countnulls")) { + break; + case COUNT_NULLS: statsObj.getStatsData().getBooleanStats().setNumNulls(v); + break; + default: + throw new RuntimeException("Unsupported column stat for BOOLEAN : " + csf); } } @@ -91,51 +98,67 @@ private static void unpackBooleanStats(ObjectInspector oi, Object o, String fNam static class UnsupportedDoubleException extends Exception { } - private static void unpackDoubleStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumDVs(v); - } else if (fName.equals("max")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { + private static void unpackDoubleStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumNulls(cn); + break; + case MIN: + double min = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(min) || Double.isNaN(min)) { throw new UnsupportedDoubleException(); } - statsObj.getStatsData().getDoubleStats().setHighValue(d); - } else if (fName.equals("min")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { + statsObj.getStatsData().getDoubleStats().setLowValue(min); + break; + case MAX: + double max = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(max) || Double.isNaN(max)) { throw new UnsupportedDoubleException(); } - statsObj.getStatsData().getDoubleStats().setLowValue(d); - } else if (fName.equals("ndvbitvector")) { + statsObj.getStatsData().getDoubleStats().setHighValue(max); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDoubleStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DOUBLE : " + csf); } } - private static void unpackDecimalStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumDVs(v); - } else if (fName.equals("max")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d)); - } else if (fName.equals("min")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); - } else if (fName.equals("ndvbitvector")) { + private static void unpackDecimalStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumNulls(cn); + break; + case MIN: + HiveDecimal min = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(min)); + break; + case MAX: + HiveDecimal max = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(max)); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDecimalStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DECIMAL : " + csf); } } @@ -143,141 +166,182 @@ private static Decimal convertToThriftDecimal(HiveDecimal d) { return DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()); } - private static void unpackLongStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumDVs(v); - } else if (fName.equals("max")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setHighValue(v); - } else if (fName.equals("min")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setLowValue(v); - } else if (fName.equals("ndvbitvector")) { + private static void unpackLongStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumNulls(cn); + break; + case MIN: + long min = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setLowValue(min); + break; + case MAX: + long max = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setHighValue(max); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getLongStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for LONG : " + csf); } } - private static void unpackStringStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumDVs(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setMaxColLen(v); - } else if (fName.equals("ndvbitvector")) { + private static void unpackStringStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumNulls(cn); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getStringStats().setBitVectors(buf); - ; + break; + case MAX_LENGTH: + long max = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setMaxColLen(max); + break; + case AVG_LENGTH: + double avg = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setAvgColLen(avg); + break; + default: + throw new RuntimeException("Unsupported column stat for STRING : " + csf); } } - private static void unpackBinaryStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setNumNulls(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { + private static void unpackBinaryStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setNumNulls(cn); + break; + case AVG_LENGTH: + double avg = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setAvgColLen(avg); + break; + case MAX_LENGTH: long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getBinaryStats().setMaxColLen(v); + break; + default: + throw new RuntimeException("Unsupported column stat for BINARY : " + csf); } } - private static void unpackDateStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumDVs(v); - } else if (fName.equals("max")) { - DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); - } else if (fName.equals("min")) { - DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); - } else if (fName.equals("ndvbitvector")) { + private static void unpackDateStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumNulls(cn); + break; + case MIN: + DateWritableV2 min = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setLowValue(new Date(min.getDays())); + break; + case MAX: + DateWritableV2 max = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setHighValue(new Date(max.getDays())); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDateStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DATE : " + csf); } } - private static void unpackTimestampStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getTimestampStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getTimestampStats().setNumDVs(v); - } else if (fName.equals("max")) { - TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(v.getSeconds())); - } else if (fName.equals("min")) { - TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(v.getSeconds())); - } else if (fName.equals("ndvbitvector")) { + private static void unpackTimestampStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumNulls(cn); + break; + case MIN: + TimestampWritableV2 min = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(min.getSeconds())); + break; + case MAX: + TimestampWritableV2 max = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(max.getSeconds())); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getTimestampStats().setBitVectors(buf); + break; + default: + throw new RuntimeException("Unsupported column stat for TIMESTAMP : " + csf); } } - private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + private static void unpackPrimitiveObject(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { if (o == null) { return; } // First infer the type of object - if (fieldName.equals("columntype")) { + if (csf == ColumnStatsField.COLUMN_STATS_TYPE) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); ColumnStatisticsData statsData = new ColumnStatisticsData(); - if (s.equalsIgnoreCase("long")) { + if (s.equalsIgnoreCase(ColumnStatsType.LONG.toString())) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); statsData.setLongStats(longStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("double")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DOUBLE.toString())) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); statsData.setDoubleStats(doubleStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("string")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.STRING.toString())) { StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); statsData.setStringStats(stringStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("boolean")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.BOOLEAN.toString())) { BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); statsData.setBooleanStats(booleanStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("binary")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.BINARY.toString())) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("decimal")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DECIMAL.toString())) { DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("date")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DATE.toString())) { DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("timestamp")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.TIMESTAMP.toString())) { TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); statsData.setTimestampStats(timestampStats); statsObj.setStatsData(statsData); @@ -285,44 +349,21 @@ private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String f } else { // invoke the right unpack method depending on data type of the column if (statsObj.getStatsData().isSetBooleanStats()) { - unpackBooleanStats(oi, o, fieldName, statsObj); + unpackBooleanStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetLongStats()) { - unpackLongStats(oi, o, fieldName, statsObj); + unpackLongStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDoubleStats()) { - unpackDoubleStats(oi, o, fieldName, statsObj); + unpackDoubleStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetStringStats()) { - unpackStringStats(oi, o, fieldName, statsObj); + unpackStringStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetBinaryStats()) { - unpackBinaryStats(oi, o, fieldName, statsObj); + unpackBinaryStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDecimalStats()) { - unpackDecimalStats(oi, o, fieldName, statsObj); + unpackDecimalStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDateStats()) { - unpackDateStats(oi, o, fieldName, statsObj); + unpackDateStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetTimestampStats()) { - unpackTimestampStats(oi, o, fieldName, statsObj); - } - } - } - - private static void unpackStructObject(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj cStatsObj) throws UnsupportedDoubleException { - if (oi.getCategory() != ObjectInspector.Category.STRUCT) { - throw new RuntimeException("Invalid object datatype : " + oi.getCategory().toString()); - } - - StructObjectInspector soi = (StructObjectInspector) oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(o); - - for (int i = 0; i < fields.size(); i++) { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = (list == null ? null : list.get(i)); - String fieldName = fields.get(i).getFieldName(); - - if (foi.getCategory() == ObjectInspector.Category.PRIMITIVE) { - unpackPrimitiveObject(foi, f, fieldName, cStatsObj); - } else { - unpackStructObject(foi, f, fieldName, cStatsObj); + unpackTimestampStats(oi, o, csf, statsObj); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java new file mode 100644 index 0000000000..7e91fc7ec9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java @@ -0,0 +1,559 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BytesWritable; + +import static org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator; +import static org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator; + +/** + * GenericUDAFComputeBitVector. This UDAF will compute a bit vector using the + * algorithm provided as a parameter. The ndv_compute_bit_vector function can + * be used on top of it to extract an estimate of the ndv from it. + */ +@Description(name = "compute_bit_vector", + value = "_FUNC_(x) - Computes bit vector for NDV computation.") +public class GenericUDAFComputeBitVector extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { + if (parameters.length < 2 ) { + throw new UDFArgumentTypeException(parameters.length - 1, + "Exactly 2 (col + hll) or 3 (col + fm + #bitvectors) arguments are expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + + ColumnStatsType cst = ColumnStatsType.getColumnStatsType(((PrimitiveTypeInfo) parameters[0])); + switch (cst) { + case LONG: + return new GenericUDAFLongStatsEvaluator(); + case DOUBLE: + return new GenericUDAFDoubleStatsEvaluator(); + case STRING: + return new GenericUDAFStringStatsEvaluator(); + case DECIMAL: + return new GenericUDAFDecimalStatsEvaluator(); + case DATE: + return new GenericUDAFDateStatsEvaluator(); + case TIMESTAMP: + return new GenericUDAFTimestampStatsEvaluator(); + default: + throw new UDFArgumentTypeException(0, + "Type argument " + parameters[0].getTypeName() + " not valid"); + } + } + + public static abstract class GenericUDAFNumericStatsEvaluator + extends GenericUDAFEvaluator { + + protected final static int MAX_BIT_VECTORS = 1024; + + /* Object Inspector corresponding to the input parameter. + */ + protected transient PrimitiveObjectInspector inputOI; + protected transient PrimitiveObjectInspector funcOI; + protected transient PrimitiveObjectInspector numVectorsOI; + + /* Object Inspector corresponding to the bitvector. + */ + protected transient BinaryObjectInspector ndvFieldOI; + + /* Partial aggregation result returned by TerminatePartial. + */ + protected transient BytesWritable partialResult; + + /* Output of final result of the aggregation. + */ + protected transient BytesWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } + } else { + ndvFieldOI = (BinaryObjectInspector) parameters[0]; + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + partialResult = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + result = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + + if (myagg.numDV == null) { + int numVectors = 0; + // func may be null when GBY op is closing. + // see mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainuser_3.q + // original behavior is to create FMSketch + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length == 3) { + numVectors = parameters[2] == null ? 0 : PrimitiveObjectInspectorUtils.getInt( + parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } + } + myagg.initNDVEstimator(func, numVectors); + } + + if (parameters[0] != null) { + myagg.update(parameters[0], inputOI); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + // Merge numDistinctValue Estimators + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partial); + if (buf != null && buf.length != 0) { + if (myagg.numDV == null) { + myagg.numDV = getNumDistinctValueEstimator(buf); + } else { + myagg.numDV.mergeEstimators(getNumDistinctValueEstimator(buf)); + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serializePartial(partialResult); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serialize(result); + } + + public abstract class NumericStatsAgg extends AbstractAggregationBuffer { + + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return (numDV == null) ? + lengthFor(model) : numDV.lengthFor(model); + } + + protected void initNDVEstimator(String func, int numBitVectors) { + numDV = getEmptyNumDistinctValueEstimator(func, numBitVectors); + } + + protected abstract void update(Object p, PrimitiveObjectInspector inputOI); + + protected Object serialize(BytesWritable result) { + if (numDV != null) { + byte[] buf = numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + + protected Object serializePartial(BytesWritable result) { + if (numDV != null) { + // Serialize numDistinctValue Estimator + byte[] buf = numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + + public void reset() throws HiveException { + numDV = null; + } + }; + } + + /** + * GenericUDAFLongStatsEvaluator. + * + */ + public static class GenericUDAFLongStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class LongStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + long v = PrimitiveObjectInspectorUtils.getLong(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new LongStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFDoubleStatsEvaluator. + */ + public static class GenericUDAFDoubleStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DoubleStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DoubleStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + public static class GenericUDAFDecimalStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DecimalStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.lengthOfDecimal() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DecimalStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFDateStatsEvaluator. + */ + public static class GenericUDAFDateStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DateStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // DateWritableV2 is mutable, DateStatsAgg needs its own copy + DateWritableV2 v = new DateWritableV2((DateWritableV2) inputOI.getPrimitiveWritableObject(p)); + numDV.addToEstimator(v.getDays()); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DateStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFTimestampStatsEvaluator. + */ + public static class GenericUDAFTimestampStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class TimestampStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // TimestampWritableV2 is mutable, TimestampStatsAgg needs its own copy + TimestampWritableV2 v = new TimestampWritableV2((TimestampWritableV2) inputOI.getPrimitiveWritableObject(p)); + numDV.addToEstimator(v.getSeconds()); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new TimestampStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFStringStatsEvaluator. + */ + public static class GenericUDAFStringStatsEvaluator extends GenericUDAFEvaluator { + + private final static int MAX_BIT_VECTORS = 1024; + + /* Object Inspector corresponding to the input parameter. + */ + private transient PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector funcOI; + private transient PrimitiveObjectInspector numVectorsOI; + + /* Object Inspector corresponding to the bitvector + */ + private transient BinaryObjectInspector ndvFieldOI; + + /* Partial aggregation result returned by TerminatePartial. + */ + private transient BytesWritable partialResult; + + /* Output of final result of the aggregation + */ + private transient BytesWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } + } else { + ndvFieldOI = (BinaryObjectInspector) parameters[0]; + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + partialResult = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + result = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + } + + @AggregationType(estimable = true) + public static class StringStatsAgg extends AbstractAggregationBuffer { + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + public boolean firstItem; + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return (numDV == null) ? + lengthFor(model) : numDV.lengthFor(model); } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + StringStatsAgg result = new StringStatsAgg(); + reset(result); + return result; + } + + public void initNDVEstimator(StringStatsAgg aggBuffer, String func, int numBitVectors) { + aggBuffer.numDV = getEmptyNumDistinctValueEstimator(func, numBitVectors); + aggBuffer.numDV.reset(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + myagg.firstItem = true; + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + Object p = parameters[0]; + StringStatsAgg myagg = (StringStatsAgg) agg; + + if (myagg.firstItem) { + int numVectors = 0; + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length > 2) { + numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors"); + } + } + + initNDVEstimator(myagg, func, numVectors); + myagg.firstItem = false; + } + + String v = PrimitiveObjectInspectorUtils.getString(p, inputOI); + if (v != null) { + // Add string value to NumDistinctValue Estimator + myagg.numDV.addToEstimator(v); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + StringStatsAgg myagg = (StringStatsAgg) agg; + + // Merge numDistinctValue Estimators + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partial); + + if (buf != null && buf.length != 0) { + if (myagg.numDV == null) { + myagg.numDV = getNumDistinctValueEstimator(buf); + } else { + myagg.numDV.mergeEstimators(getNumDistinctValueEstimator(buf)); + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + // Serialize numDistinctValue Estimator + if (myagg.numDV != null) { + byte[] buf = myagg.numDV.serialize(); + partialResult.set(buf, 0, buf.length); + } + return partialResult; + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + if (myagg.numDV != null) { + byte[] buf = myagg.numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + } + + @InterfaceAudience.LimitedPrivate(value = { "Hive" }) + static int lengthFor(JavaDataModel model) { + int length = model.object(); + // HiveConf hive.stats.ndv.error default produces 16 + length += model.array() * 3; // three array + length += model.primitive1() * 16 * 2; // two int array + length += (model.object() + model.array() + model.primitive1() + model.primitive2()) + * 16; // bitset array + return length; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 4e7c598155..2b5f90e2c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -61,6 +61,7 @@ */ @Description(name = "compute_stats", value = "_FUNC_(x) - Returns the statistical summary of a set of primitive type values.") +@Deprecated public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFComputeStats.class.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java new file mode 100644 index 0000000000..9f9d8eb044 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + + +/** + * GenericUDFNDVComputeBitVector. The ndv_compute_bit_vector function can be used on top of + * compute_bit_vector aggregate function to extract an estimate of the ndv from it. + */ +@Description(name = "ndv_compute_bit_vector", + value = "_FUNC_(x) - Extracts NDV from bit vector.") +public class GenericUDFNDVComputeBitVector extends GenericUDF { + + protected transient BinaryObjectInspector inputOI; + protected final LongWritable result = new LongWritable(0); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "ndv_compute_bitvector input only takes primitive types, got " + arguments[0].getTypeName()); + } + PrimitiveObjectInspector objectInspector = (PrimitiveObjectInspector) arguments[0]; + if (objectInspector.getPrimitiveCategory() != PrimitiveCategory.BINARY) { + throw new UDFArgumentTypeException(0, + "ndv_compute_bitvector input only takes BINARY type, got " + arguments[0].getTypeName()); + } + inputOI = (BinaryObjectInspector) arguments[0]; + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (arguments[0] == null) { + return null; + } + Object input = arguments[0].get(); + if (input == null) { + return null; + } + + byte[] buf = inputOI.getPrimitiveJavaObject(input); + if (buf == null || buf.length == 0) { + return null; + } + NumDistinctValueEstimator numDV = + NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(buf); + result.set(numDV.estimateNumDistinctValues()); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("ndv_compute_bit_vector", children, ","); + } +} diff --git a/ql/src/test/results/clientnegative/fileformat_void_input.q.out b/ql/src/test/results/clientnegative/fileformat_void_input.q.out index 59242ea2f4..42eb757007 100644 --- a/ql/src/test/results/clientnegative/fileformat_void_input.q.out +++ b/ql/src/test/results/clientnegative/fileformat_void_input.q.out @@ -10,4 +10,4 @@ POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -FAILED: SemanticException 1:72 Input format must implement InputFormat. Error encountered near token 'dest1' +FAILED: SemanticException 1:705 Input format must implement InputFormat. Error encountered near token 'dest1' diff --git a/ql/src/test/results/clientnegative/masking_mv.q.out b/ql/src/test/results/clientnegative/masking_mv.q.out index 02ff153254..926c5062ce 100644 --- a/ql/src/test/results/clientnegative/masking_mv.q.out +++ b/ql/src/test/results/clientnegative/masking_mv.q.out @@ -62,29 +62,33 @@ STAGE PLANS: outputColumnNames: col1 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index 5d09134eb8..10badf0b68 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -188,11 +188,11 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '1' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 1 @@ -205,8 +205,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9 + columns.types string,int,int,bigint,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -265,7 +265,7 @@ STAGE PLANS: Map-reduce partition columns: '1' (type: string) Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -278,8 +278,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9 + columns.types string,int,int,bigint,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -288,8 +288,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9 + columns.types string,int,int,bigint,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -298,14 +298,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '1' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 275 Data size: 1100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 275 Data size: 1100 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 @@ -320,8 +320,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out index bbe6b8baed..72905af8ba 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out @@ -407,11 +407,11 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3223 Data size: 610250 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 3223 Data size: 610250 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 1 @@ -424,8 +424,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9 + columns.types string,int,int,bigint,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -485,7 +485,7 @@ STAGE PLANS: Map-reduce partition columns: '2' (type: string) Statistics: Num rows: 3223 Data size: 610250 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -498,8 +498,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9 + columns.types string,int,int,bigint,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -508,8 +508,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types string,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9 + columns.types string,int,int,bigint,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -518,14 +518,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1611 Data size: 305030 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1611 Data size: 305030 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 @@ -540,8 +540,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 5b47efaccd..79d0e15b7c 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -662,29 +662,33 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(v1), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(v2), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index 83516bf2f5..04018ed252 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -268,11 +268,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -284,8 +284,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -421,9 +421,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -435,8 +435,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -445,8 +445,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -455,34 +455,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value @@ -693,11 +697,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -709,8 +713,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -846,9 +850,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -860,8 +864,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -870,8 +874,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -880,34 +884,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index c95804c623..a1bd8ac70a 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -252,11 +252,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -268,8 +268,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -405,9 +405,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -419,8 +419,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -429,8 +429,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -439,34 +439,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value @@ -677,11 +681,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -693,8 +697,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -830,9 +834,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -844,8 +848,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -854,8 +858,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -864,34 +868,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index bbff481863..ed4d1f8a05 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -252,11 +252,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -268,8 +268,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -405,9 +405,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -419,8 +419,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -429,8 +429,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -439,34 +439,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value @@ -677,11 +681,11 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 1 compressed: false @@ -693,8 +697,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -830,9 +834,9 @@ STAGE PLANS: null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -844,8 +848,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -854,8 +858,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types int,struct,bigint,bigint,binary,int,struct,bigint,binary,int,struct,bigint,binary escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -864,34 +868,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out index b43c6758ed..dbdf1d7ddc 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -283,19 +283,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 312 Data size: 178025 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -388,34 +388,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -823,19 +827,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 163 Data size: 93968 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -928,34 +932,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index bd2e6a8cbf..309eadc8d2 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -213,19 +213,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 146 Data size: 70215 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -272,34 +272,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index cd3576a652..93980ab3af 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -270,19 +270,19 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 163 Data size: 93968 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -329,34 +329,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/join_map_ppr.q.out b/ql/src/test/results/clientpositive/join_map_ppr.q.out index bf57f67346..14d38db8fa 100644 --- a/ql/src/test/results/clientpositive/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -143,19 +143,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 1100 Data size: 195800 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -204,34 +204,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -726,19 +730,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 1100 Data size: 104500 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Local Work: Map Reduce Local Work @@ -787,34 +791,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index bc9bdc7116..f851915331 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -84,12 +84,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -97,9 +97,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -184,30 +184,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index a925fc99d7..0902ddc0d1 100644 --- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1881,7 +1881,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Reducer 5 @@ -2753,7 +2753,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Reducer 5 @@ -2761,7 +2761,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Reducer 6 diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out index 8a05edf911..a5b2ad97ff 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: a, b, d Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(d), max(d), count(d), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: date), _col10 (type: date), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DATE' (type: string), _col9 (type: date), _col10 (type: date), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -289,33 +293,37 @@ STAGE PLANS: outputColumnNames: a, b, d Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(d), max(d), count(d), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: date), _col10 (type: date), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DATE' (type: string), _col9 (type: date), _col10 (type: date), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out index b1a84e0fb8..64cc3f3972 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out @@ -131,31 +131,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out index 6d9150eb82..841b5b6834 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out @@ -59,37 +59,37 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -285,37 +285,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(d), compute_bit_vector(d, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -469,37 +469,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(d), compute_bit_vector(d, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out index e92048ccb3..1d699fd7be 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out @@ -83,12 +83,12 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: int) @@ -96,9 +96,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -144,30 +144,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:int + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:int escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -342,12 +342,12 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: int) @@ -355,9 +355,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -403,30 +403,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:int + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:int escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -539,37 +539,37 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') keys: part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out index b674164063..002cab35a2 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_6.q.out @@ -56,19 +56,19 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: one (type: string), two (type: string), three (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 426500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 500 Data size: 426500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -82,18 +82,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 392500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out index 797ccdd911..57e63514cf 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out @@ -106,35 +106,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out index 48d1d92078..a78ab9603b 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out @@ -119,12 +119,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -132,9 +132,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Filter Operator isSamplingPred: false @@ -178,12 +178,12 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: '2008-12-31' (type: string), _col1 (type: string) @@ -191,9 +191,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -358,30 +358,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -396,30 +396,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out index d3542c947f..886ccd1fab 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join1.q.out b/ql/src/test/results/clientpositive/llap/auto_join1.q.out index 8c039780de..7c6b713bae 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join1.q.out @@ -94,33 +94,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join14.q.out b/ql/src/test/results/clientpositive/llap/auto_join14.q.out index 708b0ea9cd..31e20846fa 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join14.q.out @@ -100,33 +100,37 @@ STAGE PLANS: outputColumnNames: c1, c2 Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join17.q.out b/ql/src/test/results/clientpositive/llap/auto_join17.q.out index e6500b52a5..30ada20c14 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join17.q.out @@ -73,16 +73,16 @@ STAGE PLANS: outputColumnNames: key1, value1, key2, value2 Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -111,17 +111,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join19.q.out b/ql/src/test/results/clientpositive/llap/auto_join19.q.out index 93ab531bda..4da42b1381 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join19.q.out @@ -85,16 +85,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -123,17 +123,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out index abab9ae0ab..31b66a3ab7 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out @@ -85,16 +85,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -123,17 +123,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join2.q.out b/ql/src/test/results/clientpositive/llap/auto_join2.q.out index 9982478128..1766b9f096 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join2.q.out @@ -83,16 +83,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -143,17 +143,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join26.q.out b/ql/src/test/results/clientpositive/llap/auto_join26.q.out index cbe9af7367..ac90d0481a 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join26.q.out @@ -122,31 +122,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join3.q.out b/ql/src/test/results/clientpositive/llap/auto_join3.q.out index 6f0fcad352..279a4b0174 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join3.q.out @@ -83,16 +83,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -142,17 +142,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join4.q.out b/ql/src/test/results/clientpositive/llap/auto_join4.q.out index 341c892a7b..19e9df2e8b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join4.q.out @@ -95,16 +95,16 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -133,17 +133,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join5.q.out b/ql/src/test/results/clientpositive/llap/auto_join5.q.out index 6419b80bb9..ec4b3084ee 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join5.q.out @@ -117,33 +117,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join6.q.out b/ql/src/test/results/clientpositive/llap/auto_join6.q.out index aac9321390..5f4ece6af5 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join6.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join7.q.out b/ql/src/test/results/clientpositive/llap/auto_join7.q.out index cd3b8c4c9d..0696406a4b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join7.q.out @@ -169,31 +169,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6 Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(c6), compute_bit_vector(c6, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join8.q.out b/ql/src/test/results/clientpositive/llap/auto_join8.q.out index 3f3dcd252c..07b361774f 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join8.q.out @@ -98,16 +98,16 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -136,17 +136,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_join9.q.out b/ql/src/test/results/clientpositive/llap/auto_join9.q.out index 06c25ec17d..75738282b9 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join9.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 3ae8a54ae8..7bf8af5094 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1763,16 +1763,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) File Output Operator compressed: false Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE @@ -1786,47 +1786,55 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2055,16 +2063,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 @@ -2088,17 +2096,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2125,31 +2137,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index 2d89f0fd2c..018b6a5d6e 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -132,16 +132,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -159,16 +159,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(k2), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -197,32 +197,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -431,16 +439,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -458,16 +466,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(k2), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -496,32 +504,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -730,16 +746,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -757,16 +773,16 @@ STAGE PLANS: outputColumnNames: k1, k2 Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(k2), compute_bit_vector(k2, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -795,32 +811,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/binary_output_format.q.out b/ql/src/test/results/clientpositive/llap/binary_output_format.q.out index 0b7a75eede..f9c79007f9 100644 --- a/ql/src/test/results/clientpositive/llap/binary_output_format.q.out +++ b/ql/src/test/results/clientpositive/llap/binary_output_format.q.out @@ -124,19 +124,19 @@ STAGE PLANS: outputColumnNames: mydata Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(mydata, 'hll') + aggregations: max(length(mydata)), avg(COALESCE(length(mydata),0)), count(1), count(mydata), compute_bit_vector(mydata, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) auto parallelism: false Path -> Alias: hdfs://### HDFS PATH ### [src] @@ -182,34 +182,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket1.q.out b/ql/src/test/results/clientpositive/llap/bucket1.q.out index 706d391ea9..8afc836d77 100644 --- a/ql/src/test/results/clientpositive/llap/bucket1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket1.q.out @@ -136,53 +136,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket2.q.out b/ql/src/test/results/clientpositive/llap/bucket2.q.out index d40bd0107b..12ac8a3fee 100644 --- a/ql/src/test/results/clientpositive/llap/bucket2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -135,34 +135,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket3.q.out b/ql/src/test/results/clientpositive/llap/bucket3.q.out index 094286123e..e423c62501 100644 --- a/ql/src/test/results/clientpositive/llap/bucket3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -139,12 +139,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -152,39 +152,39 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/bucket4.q.out b/ql/src/test/results/clientpositive/llap/bucket4.q.out index 7246806692..3242b5c4a0 100644 --- a/ql/src/test/results/clientpositive/llap/bucket4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -136,34 +136,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket5.q.out b/ql/src/test/results/clientpositive/llap/bucket5.q.out index 7127f021c4..277f913d65 100644 --- a/ql/src/test/results/clientpositive/llap/bucket5.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -174,53 +174,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 4 Execution mode: llap Needs Tagging: false @@ -261,53 +265,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket6.q.out b/ql/src/test/results/clientpositive/llap/bucket6.q.out index ac9fb00c57..b2ff9e8c9a 100644 --- a/ql/src/test/results/clientpositive/llap/bucket6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out index 75f3452e17..b2ff0f7141 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -136,53 +136,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out b/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out index 716d60a822..8238b87fdc 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out @@ -133,34 +133,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index 018cb75ef3..458cb554b4 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -649,53 +649,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1063,53 +1067,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index bc9afb00eb..9b4a636042 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -326,53 +326,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -751,53 +755,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1236,53 +1244,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 139 Data size: 21549 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index c2ec2c208c..cf6052b831 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -350,53 +350,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -775,53 +779,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index b1976d2d4e..3bf00f097e 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -334,53 +334,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -731,53 +735,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out index 111d1f8b0f..7243feea83 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out @@ -431,35 +431,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index a4e3ca716a..33f028740d 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -179,35 +179,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -405,35 +405,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 29 Data size: 7917 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9655172 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,35 +655,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 29 Data size: 7917 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9655172 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -909,35 +909,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,35 +1145,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1381,35 +1381,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out index b635dde797..cbf1cc3726 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out @@ -228,35 +228,35 @@ STAGE PLANS: outputColumnNames: value, key, ds Statistics: Num rows: 500 Data size: 136500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), min(key), max(key), count(key), compute_bit_vector(key, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out index 77f994fede..a60d24ca1c 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out @@ -168,35 +168,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -420,35 +420,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out index ca137dc2cd..736d049369 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out @@ -168,35 +168,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -411,35 +411,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out index 77c8898adb..8343234a14 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out @@ -170,35 +170,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -414,35 +414,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,35 +658,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -837,35 +837,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1022,35 +1022,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1284,35 +1284,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1556,35 +1556,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index b5968b2d03..7ee12f2570 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -157,35 +157,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -385,35 +385,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -619,35 +619,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out index 11d17af1e0..aa00fa8517 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out @@ -168,35 +168,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -406,35 +406,35 @@ STAGE PLANS: outputColumnNames: key, key2, value, ds Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.9285714 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 645 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 577 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 879 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out b/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out index 27f1c15a54..05d27c658e 100644 --- a/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out +++ b/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/cast1.q.out b/ql/src/test/results/clientpositive/llap/cast1.q.out index 1e2217c75b..587c03c21b 100644 --- a/ql/src/test/results/clientpositive/llap/cast1.q.out +++ b/ql/src/test/results/clientpositive/llap/cast1.q.out @@ -56,33 +56,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7 Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28 + Statistics: Num rows: 1 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Statistics: Num rows: 1 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary), _col25 (type: int), _col26 (type: int), _col27 (type: bigint), _col28 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), min(VALUE._col25), max(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28 + Statistics: Num rows: 1 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'LONG' (type: string), UDFToLong(_col25) (type: bigint), UDFToLong(_col26) (type: bigint), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out index d28a10406d..f362733658 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out @@ -73,16 +73,16 @@ STAGE PLANS: outputColumnNames: key1, value1, key2, value2 Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -111,17 +111,17 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out index 462186a104..e820953e63 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out @@ -88,31 +88,31 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -258,31 +258,31 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out index f134aee107..97955db577 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -96,17 +96,17 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll'), min(c10), max(c10), count(c10), compute_bit_vector(c10, 'hll'), min(c11), max(c11), count(c11), compute_bit_vector(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary), 'DOUBLE' (type: string), _col37 (type: double), _col38 (type: double), (_col2 - _col39) (type: bigint), COALESCE(ndv_compute_bit_vector(_col40),0) (type: bigint), _col40 (type: binary), 'DOUBLE' (type: string), _col41 (type: double), _col42 (type: double), (_col2 - _col43) (type: bigint), COALESCE(ndv_compute_bit_vector(_col44),0) (type: bigint), _col44 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65 + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/clusterctas.q.out b/ql/src/test/results/clientpositive/llap/clusterctas.q.out index 40ceee215f..ae810b636d 100644 --- a/ql/src/test/results/clientpositive/llap/clusterctas.q.out +++ b/ql/src/test/results/clientpositive/llap/clusterctas.q.out @@ -67,31 +67,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out index bea9396a5a..73c0ec967c 100644 --- a/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out +++ b/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out @@ -90,22 +90,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out index fb4480863a..92bf73c844 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -86,19 +86,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -144,34 +144,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -348,12 +352,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 196 Data size: 257552 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 196 Data size: 233824 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -361,9 +365,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 196 Data size: 233824 Basic stats: PARTIAL Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -452,30 +456,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 98 Data size: 115440 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 98 Data size: 115440 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 98 Data size: 115440 Basic stats: PARTIAL Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -755,12 +759,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 196 Data size: 257552 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 196 Data size: 233824 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -768,9 +772,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 196 Data size: 313792 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 196 Data size: 233824 Basic stats: PARTIAL Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -859,30 +863,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 98 Data size: 115440 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 98 Data size: 115440 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 98 Data size: 155424 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 98 Data size: 115440 Basic stats: PARTIAL Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -1159,12 +1163,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 117 Data size: 121232 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), '11' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 117 Data size: 166072 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 117 Data size: 118336 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), '11' (type: string) @@ -1172,9 +1176,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 117 Data size: 166072 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 117 Data size: 118336 Basic stats: PARTIAL Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -1225,30 +1229,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), '11' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 81584 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 58 Data size: 57920 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 81584 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 58 Data size: 57920 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 58 Data size: 81584 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 58 Data size: 57920 Basic stats: PARTIAL Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index dc80b7ccad..6e7af4b51a 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -89,19 +89,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: all inputs @@ -147,34 +147,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -349,12 +353,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -362,9 +366,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -447,30 +451,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 900 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 900 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -745,12 +749,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), '11' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 742 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), '11' (type: string) @@ -758,9 +762,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 742 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -808,30 +812,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), '11' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 606 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 802 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 802 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out index 8152a824f1..918133859b 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out @@ -69,11 +69,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: 2000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: 2000.0D (type: double) @@ -81,21 +81,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 2000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: 2000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -154,11 +154,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: 2000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 @@ -169,7 +169,7 @@ STAGE PLANS: Map-reduce partition columns: 2000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -221,14 +221,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: 2000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 2 @@ -243,8 +243,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1 - columns.types struct:double + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:bigint:bigint:bigint:bigint:binary:double escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -356,11 +356,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: 4000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: 4000.0D (type: double) @@ -368,21 +368,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 4000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: 4000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 4000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 4000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -441,11 +441,11 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: 4000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 @@ -456,7 +456,7 @@ STAGE PLANS: Map-reduce partition columns: 4000.0D (type: double) Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -508,14 +508,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: 4000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), 4000.0D (type: double) - outputColumnNames: _col0, _col1 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 4000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE File Output Operator bucketingVersion: 2 @@ -530,8 +530,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1 - columns.types struct:double + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:bigint:bigint:bigint:bigint:binary:double escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -605,37 +605,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(employeename), compute_bit_vector(employeename, 'hll') keys: 2000.0D (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: 2000.0D (type: double) null sort order: z sort order: + Map-reduce partition columns: 2000.0D (type: double) - Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: 2000.0D (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 538 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 538 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -741,37 +741,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, employeesalary Statistics: Num rows: 26 Data size: 2596 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(employeename), compute_bit_vector(employeename, 'hll') keys: employeesalary (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 816 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 2 Data size: 816 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -881,33 +881,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename Statistics: Num rows: 26 Data size: 2300 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(employeename), compute_bit_vector(employeename, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out index 9ddb93654e..321cd200af 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out @@ -105,37 +105,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, country Statistics: Num rows: 1 Data size: 1012 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeename, 'hll'), compute_stats(employeeid, 'hll') + aggregations: max(length(employeename)), avg(COALESCE(length(employeename),0)), count(1), count(employeename), compute_bit_vector(employeename, 'hll'), min(employeeid), max(employeeid), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: 4000.0D (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 772 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: 4000.0D (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: 4000.0D (type: double), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 772 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: 4000.0D (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 772 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), 4000.0D (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 4000.0D (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 772 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 772 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -263,37 +263,37 @@ STAGE PLANS: outputColumnNames: employeeid, country Statistics: Num rows: 3 Data size: 2254 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: 2000.0D (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3 Data size: 1068 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: 2000.0D (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: 2000.0D (type: double), _col1 (type: string) - Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct) + Statistics: Num rows: 3 Data size: 1068 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: 2000.0D (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 356 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), 2000.0D (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 2000.0D (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 356 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 356 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,37 +419,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeesalary, country Statistics: Num rows: 31 Data size: 6072 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll') keys: employeesalary (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 31 Data size: 11280 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct) + Statistics: Num rows: 31 Data size: 11280 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 15 Data size: 5456 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 15 Data size: 5456 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 5456 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -573,37 +573,37 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, employeesalary, country Statistics: Num rows: 54 Data size: 15386 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + aggregations: min(employeeid), max(employeeid), count(1), count(employeeid), compute_bit_vector(employeeid, 'hll'), max(length(employeename)), avg(COALESCE(length(employeename),0)), count(employeename), compute_bit_vector(employeename, 'hll') keys: employeesalary (type: double), country (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 8 Data size: 4736 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 8 Data size: 4736 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 8 Data size: 4192 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 8 Data size: 5776 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 5776 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out index 8ac436aeb7..49405d8fac 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out @@ -42,33 +42,37 @@ STAGE PLANS: outputColumnNames: user id, user name Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll') + aggregations: min(user id), max(user id), count(1), count(user id), compute_bit_vector(user id, 'hll'), max(length(user name)), avg(COALESCE(length(user name),0)), count(user name), compute_bit_vector(user name, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -120,33 +124,37 @@ STAGE PLANS: outputColumnNames: user id Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(user id, 'hll') + aggregations: min(user id), max(user id), count(1), count(user id), compute_bit_vector(user id, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out index f361fda252..cc660f9435 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out @@ -72,33 +72,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -144,19 +148,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -204,34 +208,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -287,33 +295,37 @@ STAGE PLANS: outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), max(length(desturl)), avg(COALESCE(length(desturl),0)), count(desturl), compute_bit_vector(desturl, 'hll'), max(length(visitdate)), avg(COALESCE(length(visitdate),0)), count(visitdate), compute_bit_vector(visitdate, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll'), max(length(useragent)), avg(COALESCE(length(useragent),0)), count(useragent), compute_bit_vector(useragent, 'hll'), max(length(ccode)), avg(COALESCE(length(ccode),0)), count(ccode), compute_bit_vector(ccode, 'hll'), max(length(lcode)), avg(COALESCE(length(lcode),0)), count(lcode), compute_bit_vector(lcode, 'hll'), max(length(skeyword)), avg(COALESCE(length(skeyword),0)), count(skeyword), compute_bit_vector(skeyword, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: float), _col14 (type: float), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: struct), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary), _col25 (type: int), _col26 (type: struct), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: struct), _col31 (type: bigint), _col32 (type: binary), _col33 (type: int), _col34 (type: int), _col35 (type: bigint), _col36 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), max(VALUE._col17), avg(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), max(VALUE._col25), avg(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), max(VALUE._col29), avg(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32), min(VALUE._col33), max(VALUE._col34), count(VALUE._col35), compute_bit_vector(VALUE._col36) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col13) (type: double), UDFToDouble(_col14) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col17,0)) (type: bigint), COALESCE(_col18,0) (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col25,0)) (type: bigint), COALESCE(_col26,0) (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col29,0)) (type: bigint), COALESCE(_col30,0) (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'LONG' (type: string), UDFToLong(_col33) (type: bigint), UDFToLong(_col34) (type: bigint), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -444,33 +456,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll'), count(CASE WHEN (d is true) THEN (1) ELSE (null) END), count(CASE WHEN (d is false) THEN (1) ELSE (null) END), count(d), max(length(e)), avg(COALESCE(length(e),0)), count(e) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: int), _col17 (type: struct), _col18 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), count(VALUE._col13), count(VALUE._col14), count(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'BOOLEAN' (type: string), _col13 (type: bigint), _col14 (type: bigint), (_col2 - _col15) (type: bigint), 'BINARY' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), (_col2 - _col18) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -607,33 +623,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -679,19 +699,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -739,34 +759,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -822,33 +846,37 @@ STAGE PLANS: outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), max(length(desturl)), avg(COALESCE(length(desturl),0)), count(desturl), compute_bit_vector(desturl, 'hll'), max(length(visitdate)), avg(COALESCE(length(visitdate),0)), count(visitdate), compute_bit_vector(visitdate, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll'), max(length(useragent)), avg(COALESCE(length(useragent),0)), count(useragent), compute_bit_vector(useragent, 'hll'), max(length(ccode)), avg(COALESCE(length(ccode),0)), count(ccode), compute_bit_vector(ccode, 'hll'), max(length(lcode)), avg(COALESCE(length(lcode),0)), count(lcode), compute_bit_vector(lcode, 'hll'), max(length(skeyword)), avg(COALESCE(length(skeyword),0)), count(skeyword), compute_bit_vector(skeyword, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: float), _col14 (type: float), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: struct), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary), _col25 (type: int), _col26 (type: struct), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: struct), _col31 (type: bigint), _col32 (type: binary), _col33 (type: int), _col34 (type: int), _col35 (type: bigint), _col36 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), max(VALUE._col17), avg(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), max(VALUE._col25), avg(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), max(VALUE._col29), avg(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32), min(VALUE._col33), max(VALUE._col34), count(VALUE._col35), compute_bit_vector(VALUE._col36) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col13) (type: double), UDFToDouble(_col14) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col17,0)) (type: bigint), COALESCE(_col18,0) (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col25,0)) (type: bigint), COALESCE(_col26,0) (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col29,0)) (type: bigint), COALESCE(_col30,0) (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'LONG' (type: string), UDFToLong(_col33) (type: bigint), UDFToLong(_col34) (type: bigint), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2392 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out b/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out index ab5cdf0cc8..228740a231 100644 --- a/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out @@ -78,33 +78,37 @@ STAGE PLANS: outputColumnNames: fl_date Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(fl_date, 'hll') + aggregations: min(fl_date), max(fl_date), count(1), count(fl_date), compute_bit_vector(fl_date, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'DATE' (type: string), _col0 (type: date), _col1 (type: date), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out index aaf7d89d86..2ea3aa0043 100644 --- a/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out +++ b/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out @@ -64,37 +64,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2008-04-08' (type: string), '11' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: '2008-04-08' (type: string), '11' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2008-04-08' (type: string), '11' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/constprog_dp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dp.q.out index 1eb1001fcb..ac01f4aeb3 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_dp.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_dp.q.out @@ -58,37 +58,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 626 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 626 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/constprog_type.q.out b/ql/src/test/results/clientpositive/llap/constprog_type.q.out index 59439a4133..55228aa5f6 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_type.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_type.q.out @@ -57,33 +57,37 @@ STAGE PLANS: outputColumnNames: d, t Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(d, 'hll'), compute_stats(t, 'hll') + aggregations: min(d), max(d), count(1), count(d), compute_bit_vector(d, 'hll'), min(t), max(t), count(t), compute_bit_vector(t, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DATE' (type: string), _col0 (type: date), _col1 (type: date), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'TIMESTAMP' (type: string), _col5 (type: timestamp), _col6 (type: timestamp), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 693 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 693 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/cp_sel.q.out b/ql/src/test/results/clientpositive/llap/cp_sel.q.out index 4fb741011b..4f337d09f7 100644 --- a/ql/src/test/results/clientpositive/llap/cp_sel.q.out +++ b/ql/src/test/results/clientpositive/llap/cp_sel.q.out @@ -165,35 +165,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 514 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ctas.q.out b/ql/src/test/results/clientpositive/llap/ctas.q.out index 275bebd3b9..78657b277a 100644 --- a/ql/src/test/results/clientpositive/llap/ctas.q.out +++ b/ql/src/test/results/clientpositive/llap/ctas.q.out @@ -109,31 +109,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -320,31 +324,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -531,31 +539,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -806,31 +818,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1019,31 +1035,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ctas_colname.q.out b/ql/src/test/results/clientpositive/llap/ctas_colname.q.out index f897f06669..32041d20ac 100644 --- a/ql/src/test/results/clientpositive/llap/ctas_colname.q.out +++ b/ql/src/test/results/clientpositive/llap/ctas_colname.q.out @@ -69,31 +69,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4 Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(col4), compute_bit_vector(col4, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -285,31 +289,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -524,31 +532,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -701,33 +713,37 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -911,31 +927,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1403,31 +1423,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1598,31 +1622,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out b/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out index dc9096ee00..df6690a024 100644 --- a/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out +++ b/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out @@ -61,33 +61,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out index 946f0c4128..e0eee4664f 100644 --- a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out @@ -135,34 +135,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out index b5fbb21d6e..3895392da7 100644 --- a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out @@ -97,33 +97,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -169,19 +173,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -229,34 +233,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -408,33 +416,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll'), count(CASE WHEN (d is true) THEN (1) ELSE (null) END), count(CASE WHEN (d is false) THEN (1) ELSE (null) END), count(d), max(length(e)), avg(COALESCE(length(e),0)), count(e) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: int), _col17 (type: struct), _col18 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), count(VALUE._col13), count(VALUE._col14), count(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'BOOLEAN' (type: string), _col13 (type: bigint), _col14 (type: bigint), (_col2 - _col15) (type: bigint), 'BINARY' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), (_col2 - _col18) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 07f0fcdc90..376ccef101 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -116,31 +116,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 0b5808269f..f7b44394d5 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -232,31 +232,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -507,31 +511,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -805,31 +813,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out b/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out index f571961a97..088cf0e38c 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out @@ -60,37 +60,37 @@ STAGE PLANS: outputColumnNames: i, static_part, dyn_part Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll') keys: static_part (type: int), dyn_part (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index d9dad5c879..a408b7c6ea 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -207,19 +207,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) null sort order: aa @@ -231,18 +231,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -349,19 +349,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -373,18 +373,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -717,19 +717,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) null sort order: aa @@ -741,18 +741,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -859,19 +859,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -883,18 +883,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1620,35 +1620,35 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1734,19 +1734,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) null sort order: aa @@ -1758,18 +1758,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1883,19 +1883,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 1787 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 747 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 1 Data size: 1787 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 747 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -1907,18 +1907,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 747 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2033,35 +2033,35 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.6 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 1494 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 2 Data size: 1494 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 1494 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2153,19 +2153,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.6 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 1494 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 2 Data size: 1494 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Select Operator expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2182,18 +2182,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 1494 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2591,35 +2591,35 @@ STAGE PLANS: outputColumnNames: si, i, b, f, t Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: t (type: tinyint) minReductionHashAggr: 0.5454545 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 5 Data size: 8500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 5 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: smallint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 5 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 5 Data size: 5310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5310 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 77933b3079..5789d488cf 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -148,11 +148,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -160,7 +160,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -174,14 +174,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -292,11 +292,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -304,7 +304,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -316,14 +316,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -644,11 +644,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -656,7 +656,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -670,14 +670,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -788,11 +788,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -800,7 +800,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -812,14 +812,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1549,11 +1549,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1561,19 +1561,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1663,11 +1663,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1675,7 +1675,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) null sort order: aa @@ -1687,14 +1687,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1812,11 +1812,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1824,7 +1824,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -1836,14 +1836,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1962,11 +1962,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -1974,19 +1974,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2082,11 +2082,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) @@ -2094,7 +2094,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Select Operator expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2111,14 +2111,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2520,11 +2520,11 @@ STAGE PLANS: outputColumnNames: si, i, b, f, t Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: t (type: tinyint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) @@ -2532,19 +2532,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: smallint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary), _col14 (type: float), _col15 (type: float), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col14) (type: double), UDFToDouble(_col15) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2993,11 +2993,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3005,7 +3005,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3019,14 +3019,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3116,11 +3116,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3128,7 +3128,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3142,14 +3142,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3239,11 +3239,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3251,7 +3251,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3265,14 +3265,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3362,11 +3362,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3374,7 +3374,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3388,14 +3388,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3485,11 +3485,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3497,7 +3497,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3511,14 +3511,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3608,11 +3608,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3620,7 +3620,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) null sort order: aaa @@ -3634,14 +3634,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3738,11 +3738,11 @@ STAGE PLANS: outputColumnNames: si, b, f, s, t, i Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: s (type: string), t (type: tinyint), i (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) @@ -3750,21 +3750,21 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col3 (type: smallint), _col4 (type: smallint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary), _col12 (type: float), _col13 (type: float), _col14 (type: bigint), _col15 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col12) (type: double), UDFToDouble(_col13) (type: double), (_col5 - _col14) (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -4231,19 +4231,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 804 Data size: 89236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.840796 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 129 Data size: 96363 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 129 Data size: 96363 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -4257,18 +4257,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 128 Data size: 95616 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4375,19 +4375,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -4399,18 +4399,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4502,37 +4502,37 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 804 Data size: 89236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.840796 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 129 Data size: 96363 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 129 Data size: 96363 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 128 Data size: 95616 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 147072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4624,19 +4624,19 @@ STAGE PLANS: outputColumnNames: si, i, b, f, ds, t Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + aggregations: min(si), max(si), count(1), count(si), compute_bit_vector(si, 'hll'), min(i), max(i), count(i), compute_bit_vector(i, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(f), max(f), count(f), compute_bit_vector(f, 'hll') keys: ds (type: string), t (type: tinyint) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: tinyint) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: smallint), _col3 (type: smallint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: int), _col9 (type: bigint), _col10 (type: binary), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: binary), _col15 (type: float), _col16 (type: float), _col17 (type: bigint), _col18 (type: binary) Reduce Output Operator key expressions: _col4 (type: tinyint) null sort order: a @@ -4648,18 +4648,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 5 Data size: 3735 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'LONG' (type: string), UDFToLong(_col7) (type: bigint), UDFToLong(_col8) (type: bigint), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'LONG' (type: string), _col11 (type: bigint), _col12 (type: bigint), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col15) (type: double), UDFToDouble(_col16) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 5745 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4789,19 +4789,19 @@ STAGE PLANS: outputColumnNames: i, s Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll') keys: s (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 262 Data size: 69430 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 262 Data size: 69430 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -4818,19 +4818,19 @@ STAGE PLANS: outputColumnNames: i, s Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll') keys: s (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 262 Data size: 69430 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 262 Data size: 69430 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -4844,18 +4844,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 262 Data size: 69430 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4879,18 +4879,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 262 Data size: 69430 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 94582 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index 1fa5c46b4f..4d56b3552c 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -129,14 +129,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -397,14 +397,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -679,14 +679,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -943,14 +943,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1268,18 +1268,18 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1532,18 +1532,18 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1838,35 +1838,35 @@ STAGE PLANS: outputColumnNames: k1, k2, day Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll') keys: day (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2017,35 +2017,35 @@ STAGE PLANS: outputColumnNames: k1, k2, day Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll') keys: day (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index d4d3c21572..3de8f0a4bd 100644 --- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -61,33 +61,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -185,33 +189,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -292,33 +300,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -909,33 +921,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1524,33 +1540,37 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 66250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2174,16 +2194,16 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE @@ -2204,48 +2224,56 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(1), count(i), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(j), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2362,16 +2390,16 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE @@ -2392,48 +2420,56 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(1), count(i), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(j), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2585,31 +2621,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2729,31 +2769,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 250 Data size: 82000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2877,31 +2921,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3020,31 +3068,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3189,31 +3241,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3308,31 +3364,35 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 250 Data size: 82000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3626,16 +3686,16 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(1), count(i), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(j), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3659,46 +3719,54 @@ STAGE PLANS: outputColumnNames: i, de, vc Statistics: Num rows: 83 Data size: 27224 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(de, 'hll'), compute_stats(vc, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(de), max(de), count(de), compute_bit_vector(de, 'hll'), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector(vc, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(5,2)), _col6 (type: decimal(5,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3805,37 +3873,37 @@ STAGE PLANS: outputColumnNames: a, b, c, p1, p2 Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') keys: p1 (type: string), p2 (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 797 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 797 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: int), _col12 (type: struct), _col13 (type: bigint), _col14 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 593 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col11,0)) (type: bigint), COALESCE(_col12,0) (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 891 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1413 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 891 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3944,35 +4012,35 @@ STAGE PLANS: outputColumnNames: a, b, c, p1, p2 Statistics: Num rows: 5 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') keys: p1 (type: string), p2 (type: int) minReductionHashAggr: 0.6 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 2 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 2 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: int), _col12 (type: struct), _col13 (type: bigint), _col14 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 2 Data size: 1182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col11,0)) (type: bigint), COALESCE(_col12,0) (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 2 Data size: 1778 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1778 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4147,19 +4215,19 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 250 Data size: 91500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll') keys: 'yesterday' (type: string), 3 (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 801 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: 'yesterday' (type: string), 3 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: 'yesterday' (type: string), 3 (type: int) - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 801 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: int), _col12 (type: struct), _col13 (type: bigint), _col14 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE @@ -4180,34 +4248,34 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: max(length(i)), avg(COALESCE(length(i),0)), count(1), count(i), compute_bit_vector(i, 'hll'), max(length(j)), avg(COALESCE(length(j),0)), count(j), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.962963 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: 'yesterday' (type: string), 3 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 597 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), 'yesterday' (type: string), 3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col11,0)) (type: bigint), COALESCE(_col12,0) (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'yesterday' (type: string), 3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 895 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1417 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 895 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4216,17 +4284,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -4488,31 +4560,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -4788,31 +4864,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -5105,31 +5185,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -5370,31 +5454,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -5765,33 +5853,37 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(j), max(j), count(j), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -5870,33 +5962,37 @@ STAGE PLANS: outputColumnNames: i, j Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(j), max(j), count(j), compute_bit_vector(j, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -5990,33 +6086,37 @@ STAGE PLANS: outputColumnNames: i Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -6086,33 +6186,37 @@ STAGE PLANS: outputColumnNames: i Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -6229,35 +6333,35 @@ STAGE PLANS: outputColumnNames: key, key_mm Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') keys: key_mm (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out index 7756c05c2d..78355ce411 100644 --- a/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out @@ -42,33 +42,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/explain_ddl.q.out b/ql/src/test/results/clientpositive/llap/explain_ddl.q.out index 20a7ee846f..3cada1bbae 100644 --- a/ql/src/test/results/clientpositive/llap/explain_ddl.q.out +++ b/ql/src/test/results/clientpositive/llap/explain_ddl.q.out @@ -89,33 +89,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -189,33 +193,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -293,33 +301,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -393,33 +405,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -580,33 +596,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 4f88488642..343aad17aa 100644 --- a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -1213,241 +1213,247 @@ Stage-5 Stage-3 Reducer 5 llap File Output Operator [FS_81] - Group By Operator [GBY_79] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Union 4 [CUSTOM_SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] llap - File Output Operator [FS_233] - table:{"name:":"default.a_n14"} - Select Operator [SEL_231] (rows=193/820 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_230] (rows=193/820 width=175) - Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_69] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_202] (rows=39/115 width=264) - Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_66] + Select Operator [SEL_80] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_79] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Union 4 [CUSTOM_SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + File Output Operator [FS_233] + table:{"name:":"default.a_n14"} + Select Operator [SEL_231] (rows=193/820 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_230] (rows=193/820 width=175) + Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_69] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_202] (rows=39/115 width=264) + Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_66] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_120] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_67] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_129] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_49] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] llap + Reduce Output Operator [RS_271] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_67] + Select Operator [SEL_269] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_268] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_267] (rows=25/25 width=89) + Output:["value"] + <-Map 24 [CONTAINS] llap + Reduce Output Operator [RS_276] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_129] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 23 [SIMPLE_EDGE] - <-Map 22 [CONTAINS] llap - Reduce Output Operator [RS_271] - PartitionCols:_col0 - Select Operator [SEL_269] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_268] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_267] (rows=25/25 width=89) - Output:["value"] - <-Map 24 [CONTAINS] llap - Reduce Output Operator [RS_276] - PartitionCols:_col0 - Select Operator [SEL_274] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_273] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_272] (rows=500/500 width=91) - Output:["value"] - <-Map 25 [CONTAINS] llap - Reduce Output Operator [RS_281] - PartitionCols:_col0 - Select Operator [SEL_279] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_278] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_277] (rows=500/500 width=91) - Output:["value"] - <-Map 26 [CONTAINS] llap - Reduce Output Operator [RS_286] - PartitionCols:_col0 - Select Operator [SEL_284] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_283] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_282] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_239] - Select Operator [SEL_234] (rows=2899/820 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_235] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_240] - Select Operator [SEL_236] (rows=2899/820 width=178) - Output:["key","value"] + Select Operator [SEL_274] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_273] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_272] (rows=500/500 width=91) + Output:["value"] + <-Map 25 [CONTAINS] llap + Reduce Output Operator [RS_281] + PartitionCols:_col0 + Select Operator [SEL_279] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_278] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_277] (rows=500/500 width=91) + Output:["value"] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_286] + PartitionCols:_col0 + Select Operator [SEL_284] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_283] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_282] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_239] + Select Operator [SEL_234] (rows=2899/820 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_231] + File Output Operator [FS_235] + table:{"name:":"default.b_n10"} Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_237] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_241] - Select Operator [SEL_238] (rows=2899/820 width=178) - Output:["key","value"] + Reduce Output Operator [RS_240] + Select Operator [SEL_236] (rows=2899/820 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_231] + File Output Operator [FS_237] + table:{"name:":"default.c_n3"} Please refer to the previous Select Operator [SEL_231] - <-Reducer 3 [CONTAINS] llap - File Output Operator [FS_209] - table:{"name:":"default.a_n14"} - Select Operator [SEL_207] (rows=66/170 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_206] (rows=66/170 width=177) - Conds:RS_17._col3=Union 14._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_200] (rows=39/37 width=266) - Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + Reduce Output Operator [RS_241] + Select Operator [SEL_238] (rows=2899/820 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_231] + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_209] + table:{"name:":"default.a_n14"} + Select Operator [SEL_207] (rows=66/170 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_206] (rows=66/170 width=177) + Conds:RS_17._col3=Union 14._col0(Inner),Output:["_col1","_col2"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_200] (rows=39/37 width=266) + Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_119] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_0] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_5] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] llap + Reduce Output Operator [RS_246] PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_119] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + Select Operator [SEL_244] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_243] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_242] (rows=25/25 width=89) + Output:["value"] + <-Map 15 [CONTAINS] llap + Reduce Output Operator [RS_251] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] llap - Reduce Output Operator [RS_246] - PartitionCols:_col0 - Select Operator [SEL_244] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_243] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_242] (rows=25/25 width=89) - Output:["value"] - <-Map 15 [CONTAINS] llap - Reduce Output Operator [RS_251] - PartitionCols:_col0 - Select Operator [SEL_249] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_248] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_247] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_215] - Select Operator [SEL_210] (rows=2899/170 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_207] - File Output Operator [FS_211] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_207] - Reduce Output Operator [RS_216] - Select Operator [SEL_212] (rows=2899/170 width=178) - Output:["key","value"] + Select Operator [SEL_249] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_248] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_247] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_215] + Select Operator [SEL_210] (rows=2899/170 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_207] + File Output Operator [FS_211] + table:{"name:":"default.b_n10"} Please refer to the previous Select Operator [SEL_207] - File Output Operator [FS_213] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_207] - Reduce Output Operator [RS_217] - Select Operator [SEL_214] (rows=2899/170 width=178) - Output:["key","value"] + Reduce Output Operator [RS_216] + Select Operator [SEL_212] (rows=2899/170 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_207] + File Output Operator [FS_213] + table:{"name:":"default.c_n3"} Please refer to the previous Select Operator [SEL_207] - <-Reducer 9 [CONTAINS] llap - File Output Operator [FS_221] - table:{"name:":"default.a_n14"} - Select Operator [SEL_219] (rows=2640/5421 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) - Conds:RS_41._col1=Union 18._col0(Inner),Output:["_col0","_col3"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) - Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] + Reduce Output Operator [RS_217] + Select Operator [SEL_214] (rows=2899/170 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_207] + <-Reducer 9 [CONTAINS] llap + File Output Operator [FS_221] + table:{"name:":"default.a_n14"} + Select Operator [SEL_219] (rows=2640/5421 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) + Conds:RS_41._col1=Union 18._col0(Inner),Output:["_col0","_col3"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) + Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_123] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + Please refer to the previous TableScan [TS_0] + <-Map 16 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_24] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] llap + Reduce Output Operator [RS_256] PartitionCols:_col0 - Select Operator [SEL_23] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] - <-Map 16 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] + Select Operator [SEL_254] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_253] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_252] (rows=25/25 width=89) + Output:["value"] + <-Map 19 [CONTAINS] llap + Reduce Output Operator [RS_261] PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_24] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] llap - Reduce Output Operator [RS_256] - PartitionCols:_col0 - Select Operator [SEL_254] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_253] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_252] (rows=25/25 width=89) - Output:["value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_261] - PartitionCols:_col0 - Select Operator [SEL_259] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_258] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_257] (rows=500/500 width=91) - Output:["value"] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_266] - PartitionCols:_col0 - Select Operator [SEL_264] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_263] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_262] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_227] - Select Operator [SEL_222] (rows=2899/5421 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_223] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_228] - Select Operator [SEL_224] (rows=2899/5421 width=178) - Output:["key","value"] + Select Operator [SEL_259] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_258] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_257] (rows=500/500 width=91) + Output:["value"] + <-Map 20 [CONTAINS] llap + Reduce Output Operator [RS_266] + PartitionCols:_col0 + Select Operator [SEL_264] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_263] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_262] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_227] + Select Operator [SEL_222] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + File Output Operator [FS_223] + table:{"name:":"default.b_n10"} Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_225] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_229] - Select Operator [SEL_226] (rows=2899/5421 width=178) - Output:["key","value"] + Reduce Output Operator [RS_228] + Select Operator [SEL_224] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + File Output Operator [FS_225] + table:{"name:":"default.c_n3"} Please refer to the previous Select Operator [SEL_219] + Reduce Output Operator [RS_229] + Select Operator [SEL_226] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] Reducer 6 llap File Output Operator [FS_89] - Group By Operator [GBY_87] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_88] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_87] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Reducer 7 llap File Output Operator [FS_97] - Group By Operator [GBY_95] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_96] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_95] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -1583,252 +1589,258 @@ Stage-5 Stage-3 Reducer 10 llap File Output Operator [FS_137] - Group By Operator [GBY_135] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_134] - Select Operator [SEL_133] (rows=2899/319 width=178) - Output:["key","value"] - Group By Operator [GBY_112] (rows=2899/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] llap - Reduce Output Operator [RS_260] - PartitionCols:_col0, _col1 - Select Operator [SEL_258] (rows=193/304 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_257] (rows=193/304 width=175) - Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0","_col3"] - <-Reducer 14 [SIMPLE_EDGE] llap - SHUFFLE [RS_104] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_242] (rows=39/115 width=264) - Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_101] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 28 [SIMPLE_EDGE] llap - SHUFFLE [RS_102] - PartitionCols:_col0 - Select Operator [SEL_70] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_169] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_68] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 35 [SIMPLE_EDGE] llap - SHUFFLE [RS_105] - PartitionCols:_col0 - Select Operator [SEL_100] (rows=1525/319 width=91) - Output:["_col0"] - Group By Operator [GBY_99] (rows=1525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 34 [SIMPLE_EDGE] - <-Map 38 [CONTAINS] llap - Reduce Output Operator [RS_317] - PartitionCols:_col1, _col0 - Select Operator [SEL_315] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_314] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_313] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 33 [CONTAINS] llap - Reduce Output Operator [RS_302] - PartitionCols:_col1, _col0 - Select Operator [SEL_300] (rows=1025/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_299] (rows=1025/319 width=178) + Select Operator [SEL_136] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_135] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_134] + Select Operator [SEL_133] (rows=2899/319 width=178) + Output:["key","value"] + Group By Operator [GBY_112] (rows=2899/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] llap + Reduce Output Operator [RS_260] + PartitionCols:_col0, _col1 + Select Operator [SEL_258] (rows=193/304 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_257] (rows=193/304 width=175) + Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0","_col3"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_104] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_242] (rows=39/115 width=264) + Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 28 [SIMPLE_EDGE] llap + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_169] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_68] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 35 [SIMPLE_EDGE] llap + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_100] (rows=1525/319 width=91) + Output:["_col0"] + Group By Operator [GBY_99] (rows=1525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 34 [SIMPLE_EDGE] + <-Map 38 [CONTAINS] llap + Reduce Output Operator [RS_317] + PartitionCols:_col1, _col0 + Select Operator [SEL_315] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_314] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_313] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 33 [CONTAINS] llap + Reduce Output Operator [RS_302] + PartitionCols:_col1, _col0 + Select Operator [SEL_300] (rows=1025/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_299] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 32 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] llap + Reduce Output Operator [RS_312] + PartitionCols:_col1, _col0 + Select Operator [SEL_310] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_309] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_308] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 31 [CONTAINS] llap + Reduce Output Operator [RS_298] + PartitionCols:_col1, _col0 + Select Operator [SEL_296] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_295] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 30 [SIMPLE_EDGE] + <-Map 29 [CONTAINS] llap + Reduce Output Operator [RS_294] + PartitionCols:_col1, _col0 + Select Operator [SEL_292] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_291] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_290] (rows=25/25 width=175) + Output:["key","value"] + <-Map 36 [CONTAINS] llap + Reduce Output Operator [RS_307] + PartitionCols:_col1, _col0 + Select Operator [SEL_305] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_304] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_303] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 5 [CONTAINS] llap + Reduce Output Operator [RS_252] + PartitionCols:_col0, _col1 + Group By Operator [GBY_250] (rows=2706/309 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 4 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + Reduce Output Operator [RS_256] + PartitionCols:_col0, _col1 + Select Operator [SEL_254] (rows=2640/1056 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_253] (rows=2640/1056 width=178) + Conds:RS_55._col1=RS_56._col0(Inner),Output:["_col0","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_241] (rows=791/1028 width=269) + Conds:RS_52._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_52] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 20 [SIMPLE_EDGE] llap + SHUFFLE [RS_53] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_28] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 25 [SIMPLE_EDGE] llap + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=1025/319 width=91) + Output:["_col0"] + Group By Operator [GBY_50] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 32 [SIMPLE_EDGE] - <-Map 37 [CONTAINS] llap - Reduce Output Operator [RS_312] + <-Union 24 [SIMPLE_EDGE] + <-Map 27 [CONTAINS] llap + Reduce Output Operator [RS_289] PartitionCols:_col1, _col0 - Select Operator [SEL_310] (rows=500/500 width=178) + Select Operator [SEL_287] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_309] (rows=500/500 width=178) + Filter Operator [FIL_286] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_308] (rows=500/500 width=178) + TableScan [TS_285] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 31 [CONTAINS] llap - Reduce Output Operator [RS_298] + <-Reducer 23 [CONTAINS] llap + Reduce Output Operator [RS_279] PartitionCols:_col1, _col0 - Select Operator [SEL_296] (rows=525/319 width=178) + Select Operator [SEL_277] (rows=525/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_295] (rows=525/319 width=178) + Group By Operator [GBY_276] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 30 [SIMPLE_EDGE] - <-Map 29 [CONTAINS] llap - Reduce Output Operator [RS_294] + <-Union 22 [SIMPLE_EDGE] + <-Map 21 [CONTAINS] llap + Reduce Output Operator [RS_275] PartitionCols:_col1, _col0 - Select Operator [SEL_292] (rows=25/25 width=175) + Select Operator [SEL_273] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=25/25 width=175) + Filter Operator [FIL_272] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_290] (rows=25/25 width=175) + TableScan [TS_271] (rows=25/25 width=175) Output:["key","value"] - <-Map 36 [CONTAINS] llap - Reduce Output Operator [RS_307] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_284] PartitionCols:_col1, _col0 - Select Operator [SEL_305] (rows=500/500 width=178) + Select Operator [SEL_282] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_304] (rows=500/500 width=178) + Filter Operator [FIL_281] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_303] (rows=500/500 width=178) + TableScan [TS_280] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 5 [CONTAINS] llap - Reduce Output Operator [RS_252] - PartitionCols:_col0, _col1 - Group By Operator [GBY_250] (rows=2706/309 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] llap - Reduce Output Operator [RS_256] - PartitionCols:_col0, _col1 - Select Operator [SEL_254] (rows=2640/1056 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_253] (rows=2640/1056 width=178) - Conds:RS_55._col1=RS_56._col0(Inner),Output:["_col0","_col3"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_241] (rows=791/1028 width=269) - Conds:RS_52._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_27] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 20 [SIMPLE_EDGE] llap - SHUFFLE [RS_53] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_28] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 25 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=1025/319 width=91) - Output:["_col0"] - Group By Operator [GBY_50] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 27 [CONTAINS] llap - Reduce Output Operator [RS_289] - PartitionCols:_col1, _col0 - Select Operator [SEL_287] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_286] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_285] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 23 [CONTAINS] llap - Reduce Output Operator [RS_279] - PartitionCols:_col1, _col0 - Select Operator [SEL_277] (rows=525/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_276] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] llap - Reduce Output Operator [RS_275] - PartitionCols:_col1, _col0 - Select Operator [SEL_273] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_272] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_271] (rows=25/25 width=175) - Output:["key","value"] - <-Map 26 [CONTAINS] llap - Reduce Output Operator [RS_284] - PartitionCols:_col1, _col0 - Select Operator [SEL_282] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_281] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_280] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_249] - PartitionCols:_col0, _col1 - Select Operator [SEL_247] (rows=66/61 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_246] (rows=66/61 width=177) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2"] - <-Reducer 18 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=525/319 width=91) - Output:["_col0"] - Group By Operator [GBY_16] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] llap - Reduce Output Operator [RS_265] - PartitionCols:_col1, _col0 - Select Operator [SEL_263] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_262] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_261] (rows=25/25 width=175) - Output:["key","value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_270] - PartitionCols:_col1, _col0 - Select Operator [SEL_268] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_267] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_266] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_240] (rows=39/37 width=266) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500/500 width=178) - predicate:key is not null - Please refer to the previous TableScan [TS_0] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] + <-Reducer 3 [CONTAINS] llap + Reduce Output Operator [RS_249] + PartitionCols:_col0, _col1 + Select Operator [SEL_247] (rows=66/61 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_246] (rows=66/61 width=177) + Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2"] + <-Reducer 18 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=525/319 width=91) + Output:["_col0"] + Group By Operator [GBY_16] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] llap + Reduce Output Operator [RS_265] + PartitionCols:_col1, _col0 + Select Operator [SEL_263] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_262] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_261] (rows=25/25 width=175) + Output:["key","value"] + <-Map 19 [CONTAINS] llap + Reduce Output Operator [RS_270] + PartitionCols:_col1, _col0 + Select Operator [SEL_268] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_267] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_266] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_240] (rows=39/37 width=266) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=500/500 width=178) + predicate:key is not null + Please refer to the previous TableScan [TS_0] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_5] Reducer 8 llap File Output Operator [FS_121] - Group By Operator [GBY_119] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_118] - Select Operator [SEL_117] (rows=2899/319 width=178) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_112] + Select Operator [SEL_120] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_119] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_118] + Select Operator [SEL_117] (rows=2899/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] Reducer 9 llap File Output Operator [FS_129] - Group By Operator [GBY_127] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=2899/319 width=178) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_112] + Select Operator [SEL_128] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_127] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_126] + Select Operator [SEL_125] (rows=2899/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] Stage-6 Stats Work{} Stage-1 @@ -1916,57 +1928,61 @@ Stage-4 Stage-2 Reducer 6 llap File Output Operator [FS_24] - Group By Operator [GBY_22] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_18] - table:{"name:":"default.dest1_n105"} - Select Operator [SEL_16] (rows=316/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=316/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=501/310 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_44] - PartitionCols:_col0, _col1 - Select Operator [SEL_42] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_41] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_40] - PartitionCols:_col0, _col1 - Select Operator [SEL_38] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - PARTITION_ONLY_SHUFFLE [RS_21] - Select Operator [SEL_20] (rows=316/310 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_16] + Select Operator [SEL_23] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_22] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_18] + table:{"name:":"default.dest1_n105"} + Select Operator [SEL_16] (rows=316/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=316/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=501/310 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_44] + PartitionCols:_col0, _col1 + Select Operator [SEL_42] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_41] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_40] + PartitionCols:_col0, _col1 + Select Operator [SEL_38] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_21] + Select Operator [SEL_20] (rows=316/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] Reducer 7 llap File Output Operator [FS_36] - Group By Operator [GBY_34] (rows=1/1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=501/310 width=456) - Output:["key","val1","val2"] - Select Operator [SEL_28] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_27] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_11] + Select Operator [SEL_35] (rows=1/1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_34] (rows=1/1 width=500) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')","max(VALUE._col8)","avg(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col10, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=501/310 width=456) + Output:["key","val1","val2"] + Select Operator [SEL_28] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_27] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_11] Stage-5 Stats Work{} Stage-1 @@ -2128,71 +2144,75 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_22] - Group By Operator [GBY_20] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_16] - table:{"name:":"default.dest1_n105"} - Select Operator [SEL_14] (rows=316/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=316/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_43] - PartitionCols:_col0 - Select Operator [SEL_41] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_40] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_44] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_41] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_48] - PartitionCols:_col0 - Select Operator [SEL_46] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_45] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_49] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_46] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_36] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_35] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_39] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_36] - PARTITION_ONLY_SHUFFLE [RS_19] - Select Operator [SEL_18] (rows=316/310 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_14] + Select Operator [SEL_21] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_20] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_16] + table:{"name:":"default.dest1_n105"} + Select Operator [SEL_14] (rows=316/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=316/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_43] + PartitionCols:_col0 + Select Operator [SEL_41] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_40] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_44] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_41] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_48] + PartitionCols:_col0 + Select Operator [SEL_46] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_45] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_49] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_46] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_36] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_35] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_39] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_36] + PARTITION_ONLY_SHUFFLE [RS_19] + Select Operator [SEL_18] (rows=316/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] Reducer 7 llap File Output Operator [FS_34] - Group By Operator [GBY_32] (rows=1/1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_28] - table:{"name:":"default.dest2_n29"} - Select Operator [SEL_26] (rows=1001/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_25] (rows=1001/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_30] (rows=1001/310 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_26] + Select Operator [SEL_33] (rows=1/1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_32] (rows=1/1 width=500) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')","max(VALUE._col8)","avg(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col10, 'hll')"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_28] + table:{"name:":"default.dest2_n29"} + Select Operator [SEL_26] (rows=1001/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_25] (rows=1001/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_31] + Select Operator [SEL_30] (rows=1001/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_26] Stage-5 Stats Work{} Stage-1 @@ -2270,61 +2290,65 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_14] - table:{"name:":"default.dest1_n105"} - Select Operator [SEL_12] (rows=316/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_11] (rows=316/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_41] - PartitionCols:_col0 - Select Operator [SEL_39] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_38] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_42] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_39] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_33] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_37] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_34] - PARTITION_ONLY_SHUFFLE [RS_17] - Select Operator [SEL_16] (rows=316/310 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_12] + Select Operator [SEL_19] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_18] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_14] + table:{"name:":"default.dest1_n105"} + Select Operator [SEL_12] (rows=316/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_11] (rows=316/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_41] + PartitionCols:_col0 + Select Operator [SEL_39] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_38] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_42] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_39] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_33] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_37] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_34] + PARTITION_ONLY_SHUFFLE [RS_17] + Select Operator [SEL_16] (rows=316/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_12] Reducer 7 llap File Output Operator [FS_32] - Group By Operator [GBY_30] (rows=1/1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_26] - table:{"name:":"default.dest2_n29"} - Select Operator [SEL_24] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_23] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_28] (rows=501/310 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_24] + Select Operator [SEL_31] (rows=1/1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_30] (rows=1/1 width=500) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')","max(VALUE._col8)","avg(VALUE._col9)","count(VALUE._col10)","compute_bit_vector(VALUE._col10, 'hll')"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_26] + table:{"name:":"default.dest2_n29"} + Select Operator [SEL_24] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_23] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_29] + Select Operator [SEL_28] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_24] Stage-5 Stats Work{} Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index f10356ab47..a2f241c470 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -64,10 +64,10 @@ Stage-3 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_8] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + Select Operator [SEL_9] (rows=1 width=727) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_8] (rows=1 width=529) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap File Output Operator [FS_3] table:{"name:":"default.src_orc_merge_test_part_n1"} @@ -77,8 +77,8 @@ Stage-3 default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=1061) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"],keys:ds, ts + Group By Operator [GBY_6] (rows=1 width=597) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["min(key)","max(key)","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"],keys:ds, ts Select Operator [SEL_5] (rows=500 width=292) Output:["key","value","ds","ts"] Please refer to the previous Select Operator [SEL_1] @@ -117,10 +117,10 @@ Stage-3 Stage-1 Reducer 3 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_12] (rows=1 width=1077) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + Select Operator [SEL_13] (rows=1 width=727) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_12] (rows=1 width=529) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap File Output Operator [FS_7] table:{"name:":"default.src_orc_merge_test_part_n1"} @@ -140,8 +140,8 @@ Stage-3 default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] SHUFFLE [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=1 width=1061) - Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"],keys:ds, ts + Group By Operator [GBY_10] (rows=1 width=597) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["min(key)","max(key)","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"],keys:ds, ts Select Operator [SEL_9] (rows=100 width=292) Output:["key","value","ds","ts"] Please refer to the previous Select Operator [SEL_6] @@ -3735,35 +3735,37 @@ Stage-3 Stage-1 Reducer 4 llap File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] - table:{"name:":"default.nzhang_CTAS1_n1"} - Limit [LIM_7] (rows=10 width=178) - Number of rows:10 - Select Operator [SEL_6] (rows=10 width=178) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=178) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=178) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=178) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:key, value,top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=178) - Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Select Operator [SEL_16] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_15] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_8] + table:{"name:":"default.nzhang_CTAS1_n1"} + Limit [LIM_7] (rows=10 width=178) + Number of rows:10 + Select Operator [SEL_6] (rows=10 width=178) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_5] + Top N Key Operator [TNK_18] (rows=10 width=178) + keys:_col0, _col1,top n:10 + Limit [LIM_4] (rows=10 width=178) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:key, value,top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_14] + Select Operator [SEL_13] (rows=10 width=178) + Output:["col1","col2"] + Please refer to the previous Limit [LIM_7] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -3806,35 +3808,37 @@ Stage-3 Stage-1 Reducer 4 llap File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] - table:{"name:":"default.nzhang_ctas3_n1"} - Limit [LIM_7] (rows=10 width=192) - Number of rows:10 - Select Operator [SEL_6] (rows=10 width=192) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=192) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=192) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=192) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=192) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:(key / 2), concat(value, '_con'),top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=192) - Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Select Operator [SEL_16] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_15] (rows=1 width=340) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(VALUE._col0)","max(VALUE._col0)","count(1)","count(VALUE._col0)","compute_bit_vector(VALUE._col0, 'hll')","max(VALUE._col3)","avg(VALUE._col4)","count(VALUE._col5)","compute_bit_vector(VALUE._col5, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_8] + table:{"name:":"default.nzhang_ctas3_n1"} + Limit [LIM_7] (rows=10 width=192) + Number of rows:10 + Select Operator [SEL_6] (rows=10 width=192) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_5] + Top N Key Operator [TNK_18] (rows=10 width=192) + keys:_col0, _col1,top n:10 + Limit [LIM_4] (rows=10 width=192) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=192) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=192) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:(key / 2), concat(value, '_con'),top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_14] + Select Operator [SEL_13] (rows=10 width=192) + Output:["col1","col2"] + Please refer to the previous Limit [LIM_7] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -5571,73 +5575,77 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=2640) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_14] - Group By Operator [GBY_13] (rows=1 width=2576) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')","compute_stats(VALUE._col6, 'hll')"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_9] - table:{"name:":"default.part_4_n1"} - Select Operator [SEL_7] (rows=26 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_6] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_5] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col2 - PTF Operator [PTF_3] (rows=26 width=499) - Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}}] - Select Operator [SEL_2] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_1] - PartitionCols:p_mfgr - TableScan [TS_0] (rows=26 width=231) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] - PARTITION_ONLY_SHUFFLE [RS_12] - PartitionCols:rand() - Select Operator [SEL_11] (rows=26 width=239) - Output:["p_mfgr","p_name","p_size","r","dr","s"] - Please refer to the previous Select Operator [SEL_7] + Select Operator [SEL_16] (rows=1 width=1590) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35"] + Group By Operator [GBY_15] (rows=1 width=984) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12)","min(VALUE._col13)","max(VALUE._col14)","count(VALUE._col15)","compute_bit_vector(VALUE._col16)","min(VALUE._col17)","max(VALUE._col18)","count(VALUE._col19)","compute_bit_vector(VALUE._col20)","min(VALUE._col21)","max(VALUE._col22)","count(VALUE._col23)","compute_bit_vector(VALUE._col24)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_14] + Group By Operator [GBY_13] (rows=1 width=1120) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')","min(VALUE._col8)","max(VALUE._col8)","count(VALUE._col8)","compute_bit_vector(VALUE._col8, 'hll')","min(VALUE._col9)","max(VALUE._col9)","count(VALUE._col9)","compute_bit_vector(VALUE._col9, 'hll')","min(VALUE._col10)","max(VALUE._col10)","count(VALUE._col10)","compute_bit_vector(VALUE._col10, 'hll')","min(VALUE._col11)","max(VALUE._col11)","count(VALUE._col11)","compute_bit_vector(VALUE._col11, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_9] + table:{"name:":"default.part_4_n1"} + Select Operator [SEL_7] (rows=26 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_6] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}] + Select Operator [SEL_5] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col2 + PTF Operator [PTF_3] (rows=26 width=499) + Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}}] + Select Operator [SEL_2] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_1] + PartitionCols:p_mfgr + TableScan [TS_0] (rows=26 width=231) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] + PARTITION_ONLY_SHUFFLE [RS_12] + PartitionCols:rand() + Select Operator [SEL_11] (rows=26 width=239) + Output:["p_mfgr","p_name","p_size","r","dr","s"] + Please refer to the previous Select Operator [SEL_7] Reducer 9 llap File Output Operator [FS_36] - Group By Operator [GBY_34] (rows=1 width=3520) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)","compute_stats(VALUE._col6)","compute_stats(VALUE._col7)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_33] - Group By Operator [GBY_32] (rows=1 width=3424) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')","compute_stats(VALUE._col6, 'hll')","compute_stats(VALUE._col7, 'hll')","compute_stats(VALUE._col8, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_28] - table:{"name:":"default.part_5_n1"} - Select Operator [SEL_25] (rows=26 width=247) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - PTF Operator [PTF_24] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col3"}] - Select Operator [SEL_23] (rows=26 width=499) - Output:["_col0","_col2","_col3","_col6"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col2 - Select Operator [SEL_21] (rows=26 width=491) - Output:["sum_window_0","_col1","_col2","_col5"] - PTF Operator [PTF_20] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_19] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col2 - Please refer to the previous PTF Operator [PTF_3] - PARTITION_ONLY_SHUFFLE [RS_31] - PartitionCols:rand() - Select Operator [SEL_30] (rows=26 width=247) - Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] - Please refer to the previous Select Operator [SEL_25] + Select Operator [SEL_35] (rows=1 width=2118) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47"] + Group By Operator [GBY_34] (rows=1 width=1304) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12)","min(VALUE._col13)","max(VALUE._col14)","count(VALUE._col15)","compute_bit_vector(VALUE._col16)","min(VALUE._col17)","max(VALUE._col18)","count(VALUE._col19)","compute_bit_vector(VALUE._col20)","min(VALUE._col21)","max(VALUE._col22)","count(VALUE._col23)","compute_bit_vector(VALUE._col24)","min(VALUE._col25)","max(VALUE._col26)","count(VALUE._col27)","compute_bit_vector(VALUE._col28)","min(VALUE._col29)","max(VALUE._col30)","count(VALUE._col31)","compute_bit_vector(VALUE._col32)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=1440) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')","min(VALUE._col8)","max(VALUE._col8)","count(VALUE._col8)","compute_bit_vector(VALUE._col8, 'hll')","min(VALUE._col9)","max(VALUE._col9)","count(VALUE._col9)","compute_bit_vector(VALUE._col9, 'hll')","min(VALUE._col10)","max(VALUE._col10)","count(VALUE._col10)","compute_bit_vector(VALUE._col10, 'hll')","min(VALUE._col11)","max(VALUE._col11)","count(VALUE._col11)","compute_bit_vector(VALUE._col11, 'hll')","min(VALUE._col12)","max(VALUE._col12)","count(VALUE._col12)","compute_bit_vector(VALUE._col12, 'hll')","min(VALUE._col13)","max(VALUE._col13)","count(VALUE._col13)","compute_bit_vector(VALUE._col13, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_28] + table:{"name:":"default.part_5_n1"} + Select Operator [SEL_25] (rows=26 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + PTF Operator [PTF_24] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col3"}] + Select Operator [SEL_23] (rows=26 width=499) + Output:["_col0","_col2","_col3","_col6"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col2 + Select Operator [SEL_21] (rows=26 width=491) + Output:["sum_window_0","_col1","_col2","_col5"] + PTF Operator [PTF_20] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS LAST","partition by:":"_col2"}] + Select Operator [SEL_19] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col2 + Please refer to the previous PTF Operator [PTF_3] + PARTITION_ONLY_SHUFFLE [RS_31] + PartitionCols:rand() + Select Operator [SEL_30] (rows=26 width=247) + Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] + Please refer to the previous Select Operator [SEL_25] Stage-5 Stats Work{} Stage-1 @@ -6039,42 +6047,44 @@ Stage-3 Stage-1 Reducer 4 llap File Output Operator [FS_19] - Group By Operator [GBY_17] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_16] - Group By Operator [GBY_15] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_11] - table:{"name:":"default.dest_j1_n16"} - Select Operator [SEL_9] (rows=791 width=95) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_34] (rows=791 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_22] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - PartitionCols:rand() - Select Operator [SEL_13] (rows=791 width=95) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_9] + Select Operator [SEL_18] (rows=1 width=530) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_17] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_16] + Group By Operator [GBY_15] (rows=1 width=400) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(VALUE._col0)","max(VALUE._col0)","count(1)","count(VALUE._col0)","compute_bit_vector(VALUE._col0, 'hll')","max(VALUE._col3)","avg(VALUE._col4)","count(VALUE._col5)","compute_bit_vector(VALUE._col5, 'hll')"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_11] + table:{"name:":"default.dest_j1_n16"} + Select Operator [SEL_9] (rows=791 width=95) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_34] (rows=791 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_22] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_14] + PartitionCols:rand() + Select Operator [SEL_13] (rows=791 width=95) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_9] PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1_n16 select src1.key, src2.value diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index f7ef019f5b..af31e07311 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -2767,359 +2767,365 @@ Stage-5 Stage-3 Reducer 4 llap File Output Operator [FS_82] - Group By Operator [GBY_80] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Union 3 [CUSTOM_SIMPLE_EDGE] - <-Map 18 [CONTAINS] llap - File Output Operator [FS_286] - table:{"name:":"default.a_n19"} - Select Operator [SEL_283] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_281] (rows=1844 width=10) - Conds:MAPJOIN_280._col1=RS_396._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_396] - PartitionCols:_col0 - Select Operator [SEL_395] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_394] (rows=25 width=7) - predicate:key is not null - TableScan [TS_63] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_280] (rows=1677 width=10) - Conds:SEL_282._col0=RS_367._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_367] - PartitionCols:_col1 - Select Operator [SEL_365] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_364] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_282] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_279] (rows=25 width=7) - predicate:value is not null - TableScan [TS_276] (rows=25 width=7) - Output:["value"] - Reduce Output Operator [RS_295] - Group By Operator [GBY_292] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_287] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - File Output Operator [FS_288] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_283] - Reduce Output Operator [RS_296] - Group By Operator [GBY_293] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_289] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - File Output Operator [FS_290] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_283] - Reduce Output Operator [RS_297] - Group By Operator [GBY_294] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_291] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - <-Map 19 [CONTAINS] llap - File Output Operator [FS_308] - table:{"name:":"default.a_n19"} - Select Operator [SEL_305] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_303] (rows=1844 width=10) - Conds:MAPJOIN_302._col1=RS_397._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_397] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_302] (rows=1677 width=10) - Conds:SEL_304._col0=RS_368._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_368] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_304] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_301] (rows=500 width=10) - predicate:value is not null - TableScan [TS_298] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_317] - Group By Operator [GBY_314] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_309] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - File Output Operator [FS_310] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_305] - Reduce Output Operator [RS_318] - Group By Operator [GBY_315] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_311] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - File Output Operator [FS_312] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_305] - Reduce Output Operator [RS_319] - Group By Operator [GBY_316] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_313] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - <-Map 20 [CONTAINS] llap - File Output Operator [FS_330] - table:{"name:":"default.a_n19"} - Select Operator [SEL_327] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_325] (rows=1844 width=10) - Conds:MAPJOIN_324._col1=RS_398._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_398] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_324] (rows=1677 width=10) - Conds:SEL_326._col0=RS_369._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_369] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_326] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_323] (rows=500 width=10) - predicate:value is not null - TableScan [TS_320] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_339] - Group By Operator [GBY_336] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_331] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - File Output Operator [FS_332] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_327] - Reduce Output Operator [RS_340] - Group By Operator [GBY_337] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_333] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - File Output Operator [FS_334] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_327] - Reduce Output Operator [RS_341] - Group By Operator [GBY_338] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_335] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - <-Map 21 [CONTAINS] llap - File Output Operator [FS_352] - table:{"name:":"default.a_n19"} - Select Operator [SEL_349] (rows=1844 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_347] (rows=1844 width=10) - Conds:MAPJOIN_346._col1=RS_399._col0(Inner),Output:["_col1","_col4"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_399] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_346] (rows=1677 width=10) - Conds:SEL_348._col0=RS_370._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_370] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_348] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_345] (rows=500 width=10) - predicate:value is not null - TableScan [TS_342] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_361] - Group By Operator [GBY_358] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_353] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - File Output Operator [FS_354] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_349] - Reduce Output Operator [RS_362] - Group By Operator [GBY_359] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_355] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - File Output Operator [FS_356] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_349] - Reduce Output Operator [RS_363] - Group By Operator [GBY_360] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_357] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_224] - table:{"name:":"default.a_n19"} - Select Operator [SEL_222] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_221] (rows=605 width=10) - Conds:RS_375._col3=Union 11._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_375] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_373] (rows=550 width=10) - Conds:SEL_372._col0=RS_366._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_366] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_372] (rows=500 width=10) + Select Operator [SEL_81] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_80] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Union 3 [CUSTOM_SIMPLE_EDGE] + <-Map 18 [CONTAINS] llap + File Output Operator [FS_286] + table:{"name:":"default.a_n19"} + Select Operator [SEL_283] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_281] (rows=1844 width=10) + Conds:MAPJOIN_280._col1=RS_396._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_396] + PartitionCols:_col0 + Select Operator [SEL_395] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_371] (rows=500 width=10) + Filter Operator [FIL_394] (rows=25 width=7) predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_381] - PartitionCols:_col0 - Select Operator [SEL_380] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_379] (rows=25 width=7) - predicate:value is not null - TableScan [TS_251] (rows=25 width=7) - Output:["value"] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_384] - PartitionCols:_col0 - Select Operator [SEL_383] (rows=500 width=10) + TableScan [TS_63] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_280] (rows=1677 width=10) + Conds:SEL_282._col0=RS_367._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_367] + PartitionCols:_col1 + Select Operator [SEL_365] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_364] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_282] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_382] (rows=500 width=10) + Filter Operator [FIL_279] (rows=25 width=7) predicate:value is not null - TableScan [TS_256] (rows=500 width=10) + TableScan [TS_276] (rows=25 width=7) Output:["value"] - Reduce Output Operator [RS_233] - Group By Operator [GBY_230] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_225] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - File Output Operator [FS_226] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_222] - Reduce Output Operator [RS_234] - Group By Operator [GBY_231] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_227] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - File Output Operator [FS_228] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_222] - Reduce Output Operator [RS_235] - Group By Operator [GBY_232] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_229] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - <-Reducer 8 [CONTAINS] llap - File Output Operator [FS_239] - table:{"name:":"default.a_n19"} - Select Operator [SEL_237] (rows=1127 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_236] (rows=1127 width=10) - Conds:RS_41._col3=Union 15._col0(Inner),Output:["_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_207] (rows=550 width=10) - Conds:RS_374._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_374] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_372] - <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_378] - PartitionCols:_col0 - Select Operator [SEL_377] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_376] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_24] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_387] + Reduce Output Operator [RS_295] + Group By Operator [GBY_292] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_287] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_283] + File Output Operator [FS_288] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_283] + Reduce Output Operator [RS_296] + Group By Operator [GBY_293] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_289] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_283] + File Output Operator [FS_290] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_283] + Reduce Output Operator [RS_297] + Group By Operator [GBY_294] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_291] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_283] + <-Map 19 [CONTAINS] llap + File Output Operator [FS_308] + table:{"name:":"default.a_n19"} + Select Operator [SEL_305] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_303] (rows=1844 width=10) + Conds:MAPJOIN_302._col1=RS_397._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_397] PartitionCols:_col0 - Select Operator [SEL_386] (rows=25 width=7) + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_302] (rows=1677 width=10) + Conds:SEL_304._col0=RS_368._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_368] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_304] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_385] (rows=25 width=7) + Filter Operator [FIL_301] (rows=500 width=10) predicate:value is not null - TableScan [TS_261] (rows=25 width=7) + TableScan [TS_298] (rows=500 width=10) Output:["value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_390] + Reduce Output Operator [RS_317] + Group By Operator [GBY_314] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_309] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_305] + File Output Operator [FS_310] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_305] + Reduce Output Operator [RS_318] + Group By Operator [GBY_315] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_311] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_305] + File Output Operator [FS_312] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_305] + Reduce Output Operator [RS_319] + Group By Operator [GBY_316] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_313] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_305] + <-Map 20 [CONTAINS] llap + File Output Operator [FS_330] + table:{"name:":"default.a_n19"} + Select Operator [SEL_327] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_325] (rows=1844 width=10) + Conds:MAPJOIN_324._col1=RS_398._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_398] PartitionCols:_col0 - Select Operator [SEL_389] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_324] (rows=1677 width=10) + Conds:SEL_326._col0=RS_369._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_369] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_326] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_388] (rows=500 width=10) + Filter Operator [FIL_323] (rows=500 width=10) predicate:value is not null - TableScan [TS_266] (rows=500 width=10) + TableScan [TS_320] (rows=500 width=10) Output:["value"] - <-Map 17 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_393] + Reduce Output Operator [RS_339] + Group By Operator [GBY_336] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_331] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_327] + File Output Operator [FS_332] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_327] + Reduce Output Operator [RS_340] + Group By Operator [GBY_337] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_333] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_327] + File Output Operator [FS_334] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_327] + Reduce Output Operator [RS_341] + Group By Operator [GBY_338] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_335] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_327] + <-Map 21 [CONTAINS] llap + File Output Operator [FS_352] + table:{"name:":"default.a_n19"} + Select Operator [SEL_349] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_347] (rows=1844 width=10) + Conds:MAPJOIN_346._col1=RS_399._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_399] PartitionCols:_col0 - Select Operator [SEL_392] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_346] (rows=1677 width=10) + Conds:SEL_348._col0=RS_370._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_370] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_348] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_391] (rows=500 width=10) + Filter Operator [FIL_345] (rows=500 width=10) predicate:value is not null - TableScan [TS_271] (rows=500 width=10) + TableScan [TS_342] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_248] - Group By Operator [GBY_245] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_240] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - File Output Operator [FS_241] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_237] - Reduce Output Operator [RS_249] - Group By Operator [GBY_246] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_242] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - File Output Operator [FS_243] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_237] - Reduce Output Operator [RS_250] - Group By Operator [GBY_247] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_244] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_361] + Group By Operator [GBY_358] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_353] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_349] + File Output Operator [FS_354] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_349] + Reduce Output Operator [RS_362] + Group By Operator [GBY_359] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_355] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_349] + File Output Operator [FS_356] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_349] + Reduce Output Operator [RS_363] + Group By Operator [GBY_360] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_357] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_349] + <-Reducer 2 [CONTAINS] llap + File Output Operator [FS_224] + table:{"name:":"default.a_n19"} + Select Operator [SEL_222] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_221] (rows=605 width=10) + Conds:RS_375._col3=Union 11._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_375] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_373] (rows=550 width=10) + Conds:SEL_372._col0=RS_366._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_366] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_372] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_371] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 11 [SIMPLE_EDGE] + <-Map 10 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_381] + PartitionCols:_col0 + Select Operator [SEL_380] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_379] (rows=25 width=7) + predicate:value is not null + TableScan [TS_251] (rows=25 width=7) + Output:["value"] + <-Map 12 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_384] + PartitionCols:_col0 + Select Operator [SEL_383] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_382] (rows=500 width=10) + predicate:value is not null + TableScan [TS_256] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_233] + Group By Operator [GBY_230] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_225] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_222] + File Output Operator [FS_226] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_222] + Reduce Output Operator [RS_234] + Group By Operator [GBY_231] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_227] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_222] + File Output Operator [FS_228] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_222] + Reduce Output Operator [RS_235] + Group By Operator [GBY_232] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_229] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_222] + <-Reducer 8 [CONTAINS] llap + File Output Operator [FS_239] + table:{"name:":"default.a_n19"} + Select Operator [SEL_237] (rows=1127 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_236] (rows=1127 width=10) + Conds:RS_41._col3=Union 15._col0(Inner),Output:["_col1","_col2"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_207] (rows=550 width=10) + Conds:RS_374._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_374] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_372] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_378] + PartitionCols:_col0 + Select Operator [SEL_377] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_376] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_24] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 15 [SIMPLE_EDGE] + <-Map 14 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_387] + PartitionCols:_col0 + Select Operator [SEL_386] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_385] (rows=25 width=7) + predicate:value is not null + TableScan [TS_261] (rows=25 width=7) + Output:["value"] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_390] + PartitionCols:_col0 + Select Operator [SEL_389] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_388] (rows=500 width=10) + predicate:value is not null + TableScan [TS_266] (rows=500 width=10) + Output:["value"] + <-Map 17 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_393] + PartitionCols:_col0 + Select Operator [SEL_392] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_391] (rows=500 width=10) + predicate:value is not null + TableScan [TS_271] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_248] + Group By Operator [GBY_245] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_240] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_237] + File Output Operator [FS_241] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_249] + Group By Operator [GBY_246] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_242] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_237] + File Output Operator [FS_243] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_250] + Group By Operator [GBY_247] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_244] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_237] Reducer 5 llap File Output Operator [FS_91] - Group By Operator [GBY_89] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_90] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_89] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] Reducer 6 llap File Output Operator [FS_100] - Group By Operator [GBY_98] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + Select Operator [SEL_99] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_98] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -3214,281 +3220,287 @@ Stage-5 Stage-3 Reducer 7 llap File Output Operator [FS_130] - Group By Operator [GBY_128] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_125] (rows=544 width=10) - Output:["key","value"] - Group By Operator [GBY_120] (rows=544 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 32 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_426] - PartitionCols:_col0, _col1 - Group By Operator [GBY_425] (rows=1089 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_424] (rows=484 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_423] (rows=484 width=10) - Conds:RS_360._col3=SEL_422._col0(Inner),Output:["_col1","_col2"] - <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_360] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_359] (rows=27 width=7) - Conds:SEL_357._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 25 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_353] - PartitionCols:_col0 - Select Operator [SEL_352] (rows=25 width=7) + Select Operator [SEL_129] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_128] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_125] (rows=544 width=10) + Output:["key","value"] + Group By Operator [GBY_120] (rows=544 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_426] + PartitionCols:_col0, _col1 + Group By Operator [GBY_425] (rows=1089 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_424] (rows=484 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_423] (rows=484 width=10) + Conds:RS_360._col3=SEL_422._col0(Inner),Output:["_col1","_col2"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_360] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_359] (rows=27 width=7) + Conds:SEL_357._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 25 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_353] + PartitionCols:_col0 + Select Operator [SEL_352] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_351] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_72] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_357] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_351] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_72] (rows=25 width=7) + Filter Operator [FIL_355] (rows=25 width=7) + predicate:key is not null + TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_357] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_355] (rows=25 width=7) - predicate:key is not null - TableScan [TS_3] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_422] (rows=440 width=10) - Output:["_col0"] - Group By Operator [GBY_421] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 31 [SIMPLE_EDGE] - <-Map 35 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_438] - PartitionCols:_col0, _col1 - Group By Operator [GBY_437] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_436] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_435] (rows=500 width=10) - predicate:value is not null - TableScan [TS_345] (rows=500 width=10) - Output:["key","value"] - <-Reducer 30 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_420] - PartitionCols:_col0, _col1 - Group By Operator [GBY_419] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_418] (rows=381 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_417] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_434] - PartitionCols:_col0, _col1 - Group By Operator [GBY_433] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_432] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_431] (rows=500 width=10) - predicate:value is not null - TableScan [TS_339] (rows=500 width=10) - Output:["key","value"] - <-Reducer 28 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_416] - PartitionCols:_col0, _col1 - Group By Operator [GBY_415] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_414] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_413] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] - <-Map 26 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_412] - PartitionCols:_col0, _col1 - Group By Operator [GBY_411] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_410] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_409] (rows=25 width=7) - predicate:value is not null - TableScan [TS_309] (rows=25 width=7) - Output:["key","value"] - <-Map 33 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_430] - PartitionCols:_col0, _col1 - Group By Operator [GBY_429] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_428] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_427] (rows=500 width=10) - predicate:value is not null - TableScan [TS_333] (rows=500 width=10) - Output:["key","value"] - <-Reducer 4 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_371] - PartitionCols:_col0, _col1 - Group By Operator [GBY_370] (rows=1089 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_369] (rows=605 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] llap - Reduce Output Operator [RS_273] - PartitionCols:_col0, _col1 - Group By Operator [GBY_272] (rows=1210 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_270] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_269] (rows=605 width=10) - Conds:RS_58._col3=RS_384._col0(Inner),Output:["_col1","_col2"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_252] (rows=550 width=10) - Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_364] - PartitionCols:_col0 - Select Operator [SEL_362] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=500 width=10) - predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 17 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_381] - PartitionCols:_col0 - Select Operator [SEL_380] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_379] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_29] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 22 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_384] - PartitionCols:_col0 - Select Operator [SEL_383] (rows=381 width=10) - Output:["_col0"] - Group By Operator [GBY_382] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 21 [SIMPLE_EDGE] - <-Map 24 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_408] - PartitionCols:_col0, _col1 - Group By Operator [GBY_407] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_406] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_405] (rows=500 width=10) - predicate:value is not null - TableScan [TS_303] (rows=500 width=10) - Output:["key","value"] - <-Reducer 20 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_400] - PartitionCols:_col0, _col1 - Group By Operator [GBY_399] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_398] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_397] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 19 [SIMPLE_EDGE] - <-Map 18 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_396] - PartitionCols:_col0, _col1 - Group By Operator [GBY_395] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_394] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_393] (rows=25 width=7) - predicate:value is not null - TableScan [TS_286] (rows=25 width=7) - Output:["key","value"] - <-Map 23 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_404] - PartitionCols:_col0, _col1 - Group By Operator [GBY_403] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_402] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_401] (rows=500 width=10) - predicate:value is not null - TableScan [TS_297] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_264] - PartitionCols:_col0, _col1 - Group By Operator [GBY_263] (rows=1210 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_261] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_260] (rows=605 width=10) - Conds:RS_365._col3=RS_368._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_365] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_363] (rows=550 width=10) - Conds:SEL_362._col0=RS_358._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_358] - PartitionCols:_col0 - Select Operator [SEL_356] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_354] (rows=25 width=7) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_3] - Please refer to the previous Select Operator [SEL_362] - <-Reducer 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_368] - PartitionCols:_col0 - Select Operator [SEL_367] (rows=262 width=10) - Output:["_col0"] - Group By Operator [GBY_366] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_388] - PartitionCols:_col0, _col1 - Group By Operator [GBY_387] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_386] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_385] (rows=25 width=7) - predicate:value is not null - TableScan [TS_274] (rows=25 width=7) - Output:["key","value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_392] - PartitionCols:_col0, _col1 - Group By Operator [GBY_391] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_390] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_389] (rows=500 width=10) - predicate:value is not null - TableScan [TS_280] (rows=500 width=10) - Output:["key","value"] + <-Select Operator [SEL_422] (rows=440 width=10) + Output:["_col0"] + Group By Operator [GBY_421] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 35 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_438] + PartitionCols:_col0, _col1 + Group By Operator [GBY_437] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_436] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_435] (rows=500 width=10) + predicate:value is not null + TableScan [TS_345] (rows=500 width=10) + Output:["key","value"] + <-Reducer 30 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_420] + PartitionCols:_col0, _col1 + Group By Operator [GBY_419] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_418] (rows=381 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_417] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 34 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_434] + PartitionCols:_col0, _col1 + Group By Operator [GBY_433] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_432] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_431] (rows=500 width=10) + predicate:value is not null + TableScan [TS_339] (rows=500 width=10) + Output:["key","value"] + <-Reducer 28 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_416] + PartitionCols:_col0, _col1 + Group By Operator [GBY_415] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_414] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_413] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 27 [SIMPLE_EDGE] + <-Map 26 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_412] + PartitionCols:_col0, _col1 + Group By Operator [GBY_411] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_410] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_409] (rows=25 width=7) + predicate:value is not null + TableScan [TS_309] (rows=25 width=7) + Output:["key","value"] + <-Map 33 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_430] + PartitionCols:_col0, _col1 + Group By Operator [GBY_429] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_428] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_427] (rows=500 width=10) + predicate:value is not null + TableScan [TS_333] (rows=500 width=10) + Output:["key","value"] + <-Reducer 4 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_371] + PartitionCols:_col0, _col1 + Group By Operator [GBY_370] (rows=1089 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_369] (rows=605 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] llap + Reduce Output Operator [RS_273] + PartitionCols:_col0, _col1 + Group By Operator [GBY_272] (rows=1210 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_270] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_269] (rows=605 width=10) + Conds:RS_58._col3=RS_384._col0(Inner),Output:["_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_252] (rows=550 width=10) + Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_364] + PartitionCols:_col0 + Select Operator [SEL_362] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_361] (rows=500 width=10) + predicate:key is not null + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 17 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_381] + PartitionCols:_col0 + Select Operator [SEL_380] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_379] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_29] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 22 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_384] + PartitionCols:_col0 + Select Operator [SEL_383] (rows=381 width=10) + Output:["_col0"] + Group By Operator [GBY_382] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 21 [SIMPLE_EDGE] + <-Map 24 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_408] + PartitionCols:_col0, _col1 + Group By Operator [GBY_407] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_406] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_405] (rows=500 width=10) + predicate:value is not null + TableScan [TS_303] (rows=500 width=10) + Output:["key","value"] + <-Reducer 20 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_400] + PartitionCols:_col0, _col1 + Group By Operator [GBY_399] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_398] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_397] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 19 [SIMPLE_EDGE] + <-Map 18 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_396] + PartitionCols:_col0, _col1 + Group By Operator [GBY_395] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_394] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_393] (rows=25 width=7) + predicate:value is not null + TableScan [TS_286] (rows=25 width=7) + Output:["key","value"] + <-Map 23 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_404] + PartitionCols:_col0, _col1 + Group By Operator [GBY_403] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_402] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_401] (rows=500 width=10) + predicate:value is not null + TableScan [TS_297] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_264] + PartitionCols:_col0, _col1 + Group By Operator [GBY_263] (rows=1210 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_261] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_260] (rows=605 width=10) + Conds:RS_365._col3=RS_368._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_365] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_363] (rows=550 width=10) + Conds:SEL_362._col0=RS_358._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_358] + PartitionCols:_col0 + Select Operator [SEL_356] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_354] (rows=25 width=7) + predicate:(key is not null and value is not null) + Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_362] + <-Reducer 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_368] + PartitionCols:_col0 + Select Operator [SEL_367] (rows=262 width=10) + Output:["_col0"] + Group By Operator [GBY_366] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_388] + PartitionCols:_col0, _col1 + Group By Operator [GBY_387] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_386] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_385] (rows=25 width=7) + predicate:value is not null + TableScan [TS_274] (rows=25 width=7) + Output:["key","value"] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_392] + PartitionCols:_col0, _col1 + Group By Operator [GBY_391] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_390] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_389] (rows=500 width=10) + predicate:value is not null + TableScan [TS_280] (rows=500 width=10) + Output:["key","value"] Reducer 8 llap File Output Operator [FS_139] - Group By Operator [GBY_137] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_136] - Group By Operator [GBY_135] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_134] (rows=544 width=10) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_120] + Select Operator [SEL_138] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_137] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_134] (rows=544 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] Reducer 9 llap File Output Operator [FS_148] - Group By Operator [GBY_146] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_145] - Group By Operator [GBY_144] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_143] (rows=544 width=10) - Output:["key","value"] - Please refer to the previous Group By Operator [GBY_120] + Select Operator [SEL_147] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_146] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_145] + Group By Operator [GBY_144] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_143] (rows=544 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] Stage-6 Stats Work{} Stage-1 @@ -3558,69 +3570,73 @@ Stage-4 Stage-2 Reducer 6 llap File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_21] - table:{"name:":"default.dest1_n172"} - Select Operator [SEL_19] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_18] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_13] (rows=1 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_64] - PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_62] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_48] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_59] - PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_57] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_56] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_55] - Group By Operator [GBY_54] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_53] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - PARTITION_ONLY_SHUFFLE [RS_25] - Group By Operator [GBY_24] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_23] (rows=1 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_19] + Select Operator [SEL_27] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_26] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_21] + table:{"name:":"default.dest1_n172"} + Select Operator [SEL_19] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_18] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Group By Operator [GBY_13] (rows=1 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_64] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_62] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_48] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_59] + PartitionCols:_col0, _col1 + Group By Operator [GBY_58] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_57] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_56] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap + PARTITION_ONLY_SHUFFLE [RS_55] + Group By Operator [GBY_54] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_53] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_25] + Group By Operator [GBY_24] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_23] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_19] Reducer 7 llap File Output Operator [FS_42] - Group By Operator [GBY_40] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_39] - Group By Operator [GBY_38] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] - Select Operator [SEL_37] (rows=1 width=456) - Output:["key","val1","val2"] - Select Operator [SEL_33] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_32] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_13] + Select Operator [SEL_41] (rows=1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_40] (rows=1 width=500) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_39] + Group By Operator [GBY_38] (rows=1 width=704) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(val1))","avg(COALESCE(length(val1),0))","count(val1)","compute_bit_vector(val1, 'hll')","max(length(val2))","avg(COALESCE(length(val2),0))","count(val2)","compute_bit_vector(val2, 'hll')"] + Select Operator [SEL_37] (rows=1 width=456) + Output:["key","val1","val2"] + Select Operator [SEL_33] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_32] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_13] Stage-5 Stats Work{} Stage-1 @@ -3780,89 +3796,93 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_25] - Group By Operator [GBY_23] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_18] - table:{"name:":"default.dest1_n172"} - Select Operator [SEL_16] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_73] - PartitionCols:_col0 - Group By Operator [GBY_71] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_70] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_47] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_74] - PartitionCols:_col0, _col1 - Group By Operator [GBY_72] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_70] - <-Map 9 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_78] - PartitionCols:_col0 - Group By Operator [GBY_76] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_75] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_54] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_79] - PartitionCols:_col0, _col1 - Group By Operator [GBY_77] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_75] - <-Reducer 2 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_68] - PartitionCols:_col0 - Group By Operator [GBY_66] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_65] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_64] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_63] - Group By Operator [GBY_62] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_61] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_69] - PartitionCols:_col0, _col1 - Group By Operator [GBY_67] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_65] - PARTITION_ONLY_SHUFFLE [RS_22] - Group By Operator [GBY_21] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_20] (rows=1 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_16] + Select Operator [SEL_24] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_23] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_18] + table:{"name:":"default.dest1_n172"} + Select Operator [SEL_16] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_73] + PartitionCols:_col0 + Group By Operator [GBY_71] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_70] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_47] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_74] + PartitionCols:_col0, _col1 + Group By Operator [GBY_72] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_70] + <-Map 9 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_78] + PartitionCols:_col0 + Group By Operator [GBY_76] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_75] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_54] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_79] + PartitionCols:_col0, _col1 + Group By Operator [GBY_77] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_75] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_66] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_65] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_64] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap + PARTITION_ONLY_SHUFFLE [RS_63] + Group By Operator [GBY_62] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_61] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_69] + PartitionCols:_col0, _col1 + Group By Operator [GBY_67] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_65] + PARTITION_ONLY_SHUFFLE [RS_22] + Group By Operator [GBY_21] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_20] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] Reducer 7 llap File Output Operator [FS_39] - Group By Operator [GBY_37] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_32] - table:{"name:":"default.dest2_n43"} - Select Operator [SEL_30] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_29] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_36] - Group By Operator [GBY_35] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] - Select Operator [SEL_34] (rows=1 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_30] + Select Operator [SEL_38] (rows=1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_37] (rows=1 width=500) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_32] + table:{"name:":"default.dest2_n43"} + Select Operator [SEL_30] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_29] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_36] + Group By Operator [GBY_35] (rows=1 width=704) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(val1))","avg(COALESCE(length(val1),0))","count(val1)","compute_bit_vector(val1, 'hll')","max(length(val2))","avg(COALESCE(length(val2),0))","count(val2)","compute_bit_vector(val2, 'hll')"] + Select Operator [SEL_34] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_30] Stage-5 Stats Work{} Stage-1 @@ -3916,75 +3936,79 @@ Stage-4 Stage-2 Reducer 5 llap File Output Operator [FS_23] - Group By Operator [GBY_21] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_16] - table:{"name:":"default.dest1_n172"} - Select Operator [SEL_14] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_64] - PartitionCols:_col0 - Group By Operator [GBY_62] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_61] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_45] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_65] - PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_61] - <-Reducer 2 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_59] - PartitionCols:_col0 - Group By Operator [GBY_57] (rows=1 width=275) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_56] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_55] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_54] - Group By Operator [GBY_53] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_52] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_60] - PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=1 width=459) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_56] - PARTITION_ONLY_SHUFFLE [RS_20] - Group By Operator [GBY_19] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_18] (rows=1 width=272) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_14] + Select Operator [SEL_22] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_21] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_16] + table:{"name:":"default.dest1_n172"} + Select Operator [SEL_14] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_64] + PartitionCols:_col0 + Group By Operator [GBY_62] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_61] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_45] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_65] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_61] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_59] + PartitionCols:_col0 + Group By Operator [GBY_57] (rows=1 width=275) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_56] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_55] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap + PARTITION_ONLY_SHUFFLE [RS_54] + Group By Operator [GBY_53] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_52] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_60] + PartitionCols:_col0, _col1 + Group By Operator [GBY_58] (rows=1 width=459) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_56] + PARTITION_ONLY_SHUFFLE [RS_20] + Group By Operator [GBY_19] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_18] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] Reducer 7 llap File Output Operator [FS_37] - Group By Operator [GBY_35] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_30] - table:{"name:":"default.dest2_n43"} - Select Operator [SEL_28] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_27] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_34] - Group By Operator [GBY_33] (rows=1 width=1320) - Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] - Select Operator [SEL_32] (rows=1 width=456) - Output:["key","val1","val2"] - Please refer to the previous Select Operator [SEL_28] + Select Operator [SEL_36] (rows=1 width=798) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_35] (rows=1 width=500) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_30] + table:{"name:":"default.dest2_n43"} + Select Operator [SEL_28] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_27] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_34] + Group By Operator [GBY_33] (rows=1 width=704) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(val1))","avg(COALESCE(length(val1),0))","count(val1)","compute_bit_vector(val1, 'hll')","max(length(val2))","avg(COALESCE(length(val2),0))","count(val2)","compute_bit_vector(val2, 'hll')"] + Select Operator [SEL_32] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_28] Stage-5 Stats Work{} Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out b/ql/src/test/results/clientpositive/llap/fm-sketch.q.out index f77a6db39a..4b8e3f83ea 100644 --- a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out +++ b/ql/src/test/results/clientpositive/llap/fm-sketch.q.out @@ -47,33 +47,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'fm', 16) + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'fm', 16) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -161,33 +165,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'fm', 16) + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'fm', 16) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/groupby10.q.out b/ql/src/test/results/clientpositive/llap/groupby10.q.out index d92bcb3d80..45c16b9add 100644 --- a/ql/src/test/results/clientpositive/llap/groupby10.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby10.q.out @@ -147,30 +147,34 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col4), max(VALUE._col4), count(VALUE._col4), compute_bit_vector(VALUE._col4, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -222,30 +226,34 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col4), max(VALUE._col4), count(VALUE._col4), compute_bit_vector(VALUE._col4, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -479,30 +487,34 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col4), max(VALUE._col4), count(VALUE._col4), compute_bit_vector(VALUE._col4, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -554,30 +566,34 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col4), max(VALUE._col4), count(VALUE._col4), compute_bit_vector(VALUE._col4, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -808,32 +824,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col4), max(VALUE._col4), count(VALUE._col4), compute_bit_vector(VALUE._col4, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col4), max(VALUE._col4), count(VALUE._col4), compute_bit_vector(VALUE._col4, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby11.q.out b/ql/src/test/results/clientpositive/llap/groupby11.q.out index 01c23f715e..380270082b 100644 --- a/ql/src/test/results/clientpositive/llap/groupby11.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby11.q.out @@ -131,39 +131,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 307 Data size: 57102 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), val1 (type: int), val2 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), val1 (type: int), val2 (type: int) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), min(VALUE._col6), max(VALUE._col6), count(VALUE._col6), compute_bit_vector(VALUE._col6, 'hll') keys: '111' (type: string) mode: partial1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 307 Data size: 198629 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '111' (type: string) null sort order: z sort order: + Map-reduce partition columns: '111' (type: string) - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 307 Data size: 198629 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: '111' (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 579 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -215,39 +215,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), val1 (type: int), val2 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), val1 (type: int), val2 (type: int) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), min(VALUE._col6), max(VALUE._col6), count(VALUE._col6), compute_bit_vector(VALUE._col6, 'hll') keys: '111' (type: string) mode: partial1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 307 Data size: 198629 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '111' (type: string) null sort order: z sort order: + Map-reduce partition columns: '111' (type: string) - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 307 Data size: 198629 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: '111' (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 579 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby12.q.out b/ql/src/test/results/clientpositive/llap/groupby12.q.out index add92ca3af..b56c5f6199 100644 --- a/ql/src/test/results/clientpositive/llap/groupby12.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby12.q.out @@ -79,22 +79,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map.q.out b/ql/src/test/results/clientpositive/llap/groupby1_map.q.out index 77d547ff41..b197b2555c 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_map.q.out @@ -82,31 +82,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out index 3a42667d2d..2adc64e7df 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out @@ -82,31 +82,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out index 367d27e32b..1f08ba492e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out @@ -99,31 +99,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out index da2ba5a2b6..c827d5fd16 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out @@ -83,17 +83,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), min(VALUE._col3), max(VALUE._col3), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2.q.out b/ql/src/test/results/clientpositive/llap/groupby2.q.out index bdad787edb..dc2bf35c08 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -98,35 +98,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll') mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map.q.out index 67684ced33..88ba4695cf 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_map.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out index e9f8ec6224..4a67451765 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -242,31 +246,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out index 9b87d90a7a..664143ab12 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out @@ -101,31 +101,35 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out index 2e8481d485..5199a48723 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out @@ -79,22 +79,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out index c4fce00adf..57039f4796 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out @@ -80,22 +80,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string), c3 (type: int), c4 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll'), min(VALUE._col9), max(VALUE._col9), count(VALUE._col9), compute_bit_vector(VALUE._col9, 'hll'), min(VALUE._col10), max(VALUE._col10), count(VALUE._col10), compute_bit_vector(VALUE._col10, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out index 7e05e27229..d256ed9e38 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -108,30 +108,34 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: binary), _col21 (type: double), _col22 (type: double), _col23 (type: bigint), _col24 (type: binary), _col25 (type: double), _col26 (type: double), _col27 (type: bigint), _col28 (type: binary), _col29 (type: double), _col30 (type: double), _col31 (type: bigint), _col32 (type: binary), _col33 (type: double), _col34 (type: double), _col35 (type: bigint), _col36 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), min(VALUE._col25), max(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), min(VALUE._col29), max(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32), min(VALUE._col33), max(VALUE._col34), count(VALUE._col35), compute_bit_vector(VALUE._col36) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map.q.out index 93972862f3..1a12fb911f 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_map.q.out @@ -99,17 +99,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out index 25526614bb..701b2d97fe 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out @@ -103,17 +103,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll'), min(c10), max(c10), count(c10), compute_bit_vector(c10, 'hll'), min(c11), max(c11), count(c11), compute_bit_vector(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary), 'DOUBLE' (type: string), _col37 (type: double), _col38 (type: double), (_col2 - _col39) (type: bigint), COALESCE(ndv_compute_bit_vector(_col40),0) (type: bigint), _col40 (type: binary), 'DOUBLE' (type: string), _col41 (type: double), _col42 (type: double), (_col2 - _col43) (type: bigint), COALESCE(ndv_compute_bit_vector(_col44),0) (type: bigint), _col44 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65 + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out index 761cea91a6..706315d9c3 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out @@ -114,17 +114,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out index 0f3e3c1fc1..bc1e18bc10 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out @@ -92,17 +92,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36 + Statistics: Num rows: 1 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53 + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out index 097dfb9c25..a446153ef7 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out @@ -96,17 +96,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), min(c6), max(c6), count(c6), compute_bit_vector(c6, 'hll'), min(c7), max(c7), count(c7), compute_bit_vector(c7, 'hll'), min(c8), max(c8), count(c8), compute_bit_vector(c8, 'hll'), min(c9), max(c9), count(c9), compute_bit_vector(c9, 'hll'), min(c10), max(c10), count(c10), compute_bit_vector(c10, 'hll'), min(c11), max(c11), count(c11), compute_bit_vector(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'DOUBLE' (type: string), _col29 (type: double), _col30 (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'DOUBLE' (type: string), _col33 (type: double), _col34 (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary), 'DOUBLE' (type: string), _col37 (type: double), _col38 (type: double), (_col2 - _col39) (type: bigint), COALESCE(ndv_compute_bit_vector(_col40),0) (type: bigint), _col40 (type: binary), 'DOUBLE' (type: string), _col41 (type: double), _col42 (type: double), (_col2 - _col43) (type: bigint), COALESCE(ndv_compute_bit_vector(_col44),0) (type: bigint), _col44 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65 + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4.q.out b/ql/src/test/results/clientpositive/llap/groupby4.q.out index c0492e4c33..2c71fcafcc 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4.q.out @@ -91,35 +91,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4_map.q.out b/ql/src/test/results/clientpositive/llap/groupby4_map.q.out index dc5c5eb7e2..1922fe03d1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4_map.q.out @@ -75,17 +75,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out index 14cd4909e7..4b4b4dd0d8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out @@ -75,17 +75,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out index 14b97c4733..dc48ccfb62 100644 --- a/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out @@ -74,22 +74,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5.q.out b/ql/src/test/results/clientpositive/llap/groupby5.q.out index 2405264a37..aba0a5ae41 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5.q.out @@ -103,35 +103,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5_map.q.out b/ql/src/test/results/clientpositive/llap/groupby5_map.q.out index aa6b0908cd..fa63991f81 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5_map.q.out @@ -77,17 +77,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out index c21e2de3df..f858375ddc 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out @@ -77,17 +77,21 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out index fe2dbffa5a..72b9258d95 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out @@ -84,22 +84,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6.q.out b/ql/src/test/results/clientpositive/llap/groupby6.q.out index 0e45e045b3..401ef309ba 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6.q.out @@ -91,35 +91,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6_map.q.out b/ql/src/test/results/clientpositive/llap/groupby6_map.q.out index 13e38e6304..0cd451b85b 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6_map.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out index c07bc8ae26..9f4a51614e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out @@ -92,31 +92,35 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out index 18e5c756db..49b622d172 100644 --- a/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out @@ -74,22 +74,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map.q.out b/ql/src/test/results/clientpositive/llap/groupby7_map.q.out index a37e6cf957..9c1871170c 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_map.q.out @@ -118,31 +118,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -169,31 +173,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out index f2cd96aa1a..1caeb84726 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out @@ -94,16 +94,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -127,46 +127,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out index 781547ed0f..7e6df17ae8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out @@ -136,31 +136,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -203,31 +207,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out index 8346f49a2d..f28cc22fa1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out @@ -107,22 +107,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -152,22 +156,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out index 5dc188d761..05ac6369ce 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out @@ -135,22 +135,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -181,22 +185,26 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8.q.out b/ql/src/test/results/clientpositive/llap/groupby8.q.out index a008928c9c..0db52ebc83 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8.q.out @@ -126,35 +126,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -201,35 +205,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1034,35 +1042,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1109,35 +1121,39 @@ STAGE PLANS: sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8_map.q.out b/ql/src/test/results/clientpositive/llap/groupby8_map.q.out index 7a62ab1963..552b8a8016 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8_map.q.out @@ -93,16 +93,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -126,46 +126,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out index 8668593a92..de11804420 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out @@ -134,31 +134,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -201,31 +205,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out index d3a55423c5..a6d36739c4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out @@ -96,7 +96,7 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -123,37 +123,45 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) + value expressions: key (type: int), length(value) (type: int), COALESCE(length(value),0) (type: int), value (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + aggregations: min(VALUE._col0), max(VALUE._col0), count(1), count(VALUE._col0), compute_bit_vector(VALUE._col0, 'hll'), max(VALUE._col3), avg(VALUE._col4), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby9.q.out b/ql/src/test/results/clientpositive/llap/groupby9.q.out index 0ef88e06ef..49ade3f726 100644 --- a/ql/src/test/results/clientpositive/llap/groupby9.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby9.q.out @@ -116,31 +116,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -167,31 +171,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -987,31 +995,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1038,31 +1050,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1858,31 +1874,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1909,31 +1929,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2731,31 +2755,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2782,31 +2810,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3602,31 +3634,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3653,31 +3689,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out index f166ee6784..f911e7ac5a 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_cube1.q.out @@ -740,31 +740,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -808,31 +812,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out b/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out index e742242791..aa2881db47 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_cube_multi_gby.q.out @@ -119,31 +119,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 271000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -166,31 +170,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out b/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out index eba89d8da5..2ae25e3e78 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_duplicate_key.q.out @@ -192,17 +192,21 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 250 Data size: 64250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out index 1f8d703728..e85c6ecba8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_map_ppr.q.out @@ -203,53 +203,57 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out index 8f93eb0dcf..9ee01dadb2 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_map_ppr_multi_distinct.q.out @@ -203,53 +203,57 @@ STAGE PLANS: outputColumnNames: key, c1, c2, c3, c4 Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), min(c4), max(c4), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out index aec09dec3c..bf1dc552c5 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_multi_insert_common_distinct.q.out @@ -116,31 +116,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -167,31 +171,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out index e52dafd8b8..3bc506a15e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer2.q.out @@ -97,16 +97,16 @@ STAGE PLANS: outputColumnNames: key, c1 Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE @@ -133,46 +133,54 @@ STAGE PLANS: outputColumnNames: key, c1, c2 Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out index 4d4e87c3ad..3a1c5e333a 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_multi_single_reducer3.q.out @@ -109,16 +109,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -145,46 +145,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -369,16 +377,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -405,46 +413,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -628,16 +644,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -664,46 +680,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -887,16 +911,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -923,46 +947,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out b/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out index 77c888fd92..cd3c97e3be 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_nocolumnalign.q.out @@ -87,31 +87,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_position.q.out b/ql/src/test/results/clientpositive/llap/groupby_position.q.out index 1d6ed69bed..8213d0c310 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_position.q.out @@ -114,31 +114,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 83 Data size: 15604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -165,31 +169,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -391,31 +399,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 83 Data size: 15604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -442,31 +454,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out b/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out index 9b857193a2..773d6e0a12 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_ppr.q.out @@ -201,41 +201,45 @@ STAGE PLANS: sort order: Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: key (type: string), c1 (type: int), c2 (type: string) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out index 0f12ff8348..1c2293b6e4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out @@ -201,41 +201,45 @@ STAGE PLANS: sort order: Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string), c3 (type: int), c4 (type: int) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll'), min(VALUE._col9), max(VALUE._col9), count(VALUE._col9), compute_bit_vector(VALUE._col9, 'hll'), min(VALUE._col10), max(VALUE._col10), count(VALUE._col10), compute_bit_vector(VALUE._col10, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -505,36 +509,36 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + value expressions: length(key) (type: int), COALESCE(length(key),0) (type: int), key (type: string), c1 (type: int), length(c2) (type: int), COALESCE(length(c2),0) (type: int), c2 (type: string), c3 (type: int), c4 (type: int) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll'), min(VALUE._col5), max(VALUE._col5), count(VALUE._col5), compute_bit_vector(VALUE._col5, 'hll'), max(VALUE._col6), avg(VALUE._col7), count(VALUE._col8), compute_bit_vector(VALUE._col8, 'hll'), min(VALUE._col9), max(VALUE._col9), count(VALUE._col9), compute_bit_vector(VALUE._col9, 'hll'), min(VALUE._col10), max(VALUE._col10), count(VALUE._col10), compute_bit_vector(VALUE._col10, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out index 675f58eeeb..f6a53aac02 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_rollup1.q.out @@ -553,31 +553,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -621,31 +625,35 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out index 4228454958..ccf2c153b0 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_1_23.q.out @@ -114,19 +114,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -177,34 +177,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -420,53 +424,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -609,19 +617,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -672,34 +680,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -840,19 +852,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -903,34 +915,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1079,19 +1095,19 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1142,34 +1158,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1386,53 +1406,57 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1643,53 +1667,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1912,53 +1940,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2117,19 +2149,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2226,19 +2258,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2289,34 +2321,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -2486,19 +2522,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2618,37 +2654,41 @@ STAGE PLANS: /t1_n80 [t1_n80] Reducer 3 Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 5 Execution mode: llap Needs Tagging: false @@ -2698,19 +2738,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Union 2 Vertex: Union 2 @@ -2949,19 +2989,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 607 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap Path -> Alias: @@ -3010,34 +3050,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3523,53 +3567,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3710,19 +3758,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3773,34 +3821,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3952,19 +4004,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, key4, cnt Statistics: Num rows: 6 Data size: 612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(key4), max(key4), count(key4), compute_bit_vector(key4, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4015,34 +4067,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4193,19 +4249,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4256,34 +4312,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4441,19 +4501,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4504,34 +4564,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4706,16 +4770,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4744,46 +4808,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -4959,16 +5031,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4997,46 +5069,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out index 7b01cc8e1d..1ec43ab8aa 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_2.q.out @@ -113,31 +113,35 @@ STAGE PLANS: outputColumnNames: val, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(val)), avg(COALESCE(length(val),0)), count(1), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out index 0c34c3dc5b..2da269c788 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_3.q.out @@ -92,33 +92,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -234,33 +238,37 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out index 656b2547ad..bbc5dda40e 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_4.q.out @@ -113,31 +113,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -271,31 +275,35 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out index cbb258d66e..dda679dc02 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_5.q.out @@ -92,33 +92,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -262,33 +266,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -461,31 +469,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out index 08d94dc5c5..e9948143f2 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_6.q.out @@ -125,53 +125,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -347,53 +351,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -599,53 +607,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out index 5e785ffeb5..05314d47c8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_7.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out index 0e528ffa37..e0a465fe34 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out @@ -114,19 +114,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -177,34 +177,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -442,53 +446,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -631,19 +639,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -694,34 +702,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -862,19 +874,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -925,34 +937,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1101,19 +1117,19 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1164,34 +1180,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 676 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1430,53 +1450,57 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2128 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1709,53 +1733,57 @@ STAGE PLANS: outputColumnNames: key1, key2, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2000,53 +2028,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2205,19 +2237,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2314,19 +2346,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2377,34 +2409,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -2575,19 +2611,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -2707,37 +2743,41 @@ STAGE PLANS: /t1_n56 [t1_n56] Reducer 3 Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 5 Execution mode: vectorized, llap Needs Tagging: false @@ -2808,19 +2848,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Union 2 Vertex: Union 2 @@ -3059,19 +3099,19 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 607 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap Path -> Alias: @@ -3120,34 +3160,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3677,53 +3721,57 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3864,19 +3912,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3927,34 +3975,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4106,19 +4158,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, key4, cnt Statistics: Num rows: 6 Data size: 612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(key4), max(key4), count(key4), compute_bit_vector(key4, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4169,34 +4221,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4347,19 +4403,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4410,34 +4466,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4595,19 +4655,19 @@ STAGE PLANS: outputColumnNames: key1, key2, key3, cnt Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(key3)), avg(COALESCE(length(key3),0)), count(key3), compute_bit_vector(key3, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -4658,34 +4718,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -4861,16 +4925,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4915,46 +4979,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -5131,16 +5203,16 @@ STAGE PLANS: outputColumnNames: key, val, cnt Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val)), avg(COALESCE(length(val),0)), count(val), compute_bit_vector(val, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5185,46 +5257,54 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out index a1ad55a9fb..6e345dadbd 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_test_1.q.out @@ -92,33 +92,37 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1032 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/hll.q.out b/ql/src/test/results/clientpositive/llap/hll.q.out index 0f2e13b1b2..4c9e0d38ce 100644 --- a/ql/src/test/results/clientpositive/llap/hll.q.out +++ b/ql/src/test/results/clientpositive/llap/hll.q.out @@ -47,33 +47,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -161,33 +165,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out index c62a816b7a..ef11f0e3ad 100644 --- a/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/implicit_cast_during_insert.q.out @@ -77,18 +77,18 @@ STAGE PLANS: outputColumnNames: c1, c2, p1 Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll') keys: p1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 419 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out index dc5ce6b965..84dcaeb291 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out @@ -484,35 +484,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 316 Data size: 142516 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out index 0b6952b861..883084f69d 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_grouping_operators.q.out @@ -91,35 +91,35 @@ STAGE PLANS: outputColumnNames: key, value, agg, part Statistics: Num rows: 750 Data size: 335250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(agg)), avg(COALESCE(length(agg),0)), count(agg), compute_bit_vector(agg, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1583,35 +1583,35 @@ STAGE PLANS: outputColumnNames: key, value, agg, part Statistics: Num rows: 1000 Data size: 447000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(agg)), avg(COALESCE(length(agg),0)), count(agg), compute_bit_vector(agg, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1821,35 +1821,35 @@ STAGE PLANS: outputColumnNames: key, value, agg, part Statistics: Num rows: 500 Data size: 223500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(agg)), avg(COALESCE(length(agg),0)), count(agg), compute_bit_vector(agg, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out index b6c7644a59..907582d427 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_map_operators.q.out @@ -102,37 +102,37 @@ STAGE PLANS: outputColumnNames: key, value, part Statistics: Num rows: 316 Data size: 112496 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 421 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -328,35 +328,35 @@ STAGE PLANS: outputColumnNames: key, value, part Statistics: Num rows: 500 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: part (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 421 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -515,36 +515,36 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 208033 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '1' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '1' (type: string) null sort order: z sort order: + Map-reduce partition columns: '1' (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '1' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 421 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -720,35 +720,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 307 Data size: 110520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '1' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '1' (type: string) null sort order: z sort order: + Map-reduce partition columns: '1' (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '1' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 421 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out index bab2ac51a9..d2e0a80396 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_num_buckets.q.out @@ -161,12 +161,12 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 1000 Data size: 373000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2008-04-08' (type: string), hr (type: string) minReductionHashAggr: 0.684 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 360872 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 316 Data size: 214248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: '2008-04-08' (type: string), _col1 (type: string) @@ -174,9 +174,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 360872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 214248 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: false Reduce Output Operator bucketingVersion: 2 @@ -194,30 +194,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2008-04-08' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 316 Data size: 192760 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 316 Data size: 255328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 255328 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/innerjoin.q.out b/ql/src/test/results/clientpositive/llap/innerjoin.q.out index 5355097c9c..f68d6c43d4 100644 --- a/ql/src/test/results/clientpositive/llap/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/innerjoin.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input11.q.out b/ql/src/test/results/clientpositive/llap/input11.q.out index 6b6415f375..c9fff07494 100644 --- a/ql/src/test/results/clientpositive/llap/input11.q.out +++ b/ql/src/test/results/clientpositive/llap/input11.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input12.q.out b/ql/src/test/results/clientpositive/llap/input12.q.out index 66ac09aa05..ccac4114e7 100644 --- a/ql/src/test/results/clientpositive/llap/input12.q.out +++ b/ql/src/test/results/clientpositive/llap/input12.q.out @@ -87,16 +87,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -117,16 +117,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 5225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -147,67 +147,75 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 166 Data size: 30544 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input13.q.out b/ql/src/test/results/clientpositive/llap/input13.q.out index 62b0e61daf..57ddec3db9 100644 --- a/ql/src/test/results/clientpositive/llap/input13.q.out +++ b/ql/src/test/results/clientpositive/llap/input13.q.out @@ -92,16 +92,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -122,16 +122,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 5225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key >= 200) and (key < 300)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -152,19 +152,19 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 55 Data size: 10120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -185,48 +185,56 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input14.q.out b/ql/src/test/results/clientpositive/llap/input14.q.out index f3e5526fb8..a9458523f9 100644 --- a/ql/src/test/results/clientpositive/llap/input14.q.out +++ b/ql/src/test/results/clientpositive/llap/input14.q.out @@ -89,31 +89,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input17.q.out b/ql/src/test/results/clientpositive/llap/input17.q.out index 307403980c..c285f8e668 100644 --- a/ql/src/test/results/clientpositive/llap/input17.q.out +++ b/ql/src/test/results/clientpositive/llap/input17.q.out @@ -86,31 +86,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 11 Data size: 29524 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3548 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3084 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3548 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 3084 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3564 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3564 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3084 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 3084 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3084 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input18.q.out b/ql/src/test/results/clientpositive/llap/input18.q.out index 4623d8b3ac..a26786ae3b 100644 --- a/ql/src/test/results/clientpositive/llap/input18.q.out +++ b/ql/src/test/results/clientpositive/llap/input18.q.out @@ -89,31 +89,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 31208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input1_limit.q.out b/ql/src/test/results/clientpositive/llap/input1_limit.q.out index 0e9d0dec21..95d1317c1b 100644 --- a/ql/src/test/results/clientpositive/llap/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/input1_limit.q.out @@ -115,31 +115,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -167,31 +171,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input20.q.out b/ql/src/test/results/clientpositive/llap/input20.q.out index 819d888426..656b9bb16d 100644 --- a/ql/src/test/results/clientpositive/llap/input20.q.out +++ b/ql/src/test/results/clientpositive/llap/input20.q.out @@ -101,31 +101,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input30.q.out b/ql/src/test/results/clientpositive/llap/input30.q.out index 769df05dac..c7e993bed7 100644 --- a/ql/src/test/results/clientpositive/llap/input30.q.out +++ b/ql/src/test/results/clientpositive/llap/input30.q.out @@ -86,17 +86,21 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -210,17 +214,17 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input32.q.out b/ql/src/test/results/clientpositive/llap/input32.q.out index 690279d6f3..c662f3f559 100644 --- a/ql/src/test/results/clientpositive/llap/input32.q.out +++ b/ql/src/test/results/clientpositive/llap/input32.q.out @@ -85,17 +85,21 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input33.q.out b/ql/src/test/results/clientpositive/llap/input33.q.out index daf57dea3a..11dd147650 100644 --- a/ql/src/test/results/clientpositive/llap/input33.q.out +++ b/ql/src/test/results/clientpositive/llap/input33.q.out @@ -101,31 +101,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input34.q.out b/ql/src/test/results/clientpositive/llap/input34.q.out index 3f702cdf3b..b0dcbafa07 100644 --- a/ql/src/test/results/clientpositive/llap/input34.q.out +++ b/ql/src/test/results/clientpositive/llap/input34.q.out @@ -75,31 +75,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input35.q.out b/ql/src/test/results/clientpositive/llap/input35.q.out index 85e5412b47..53788842c8 100644 --- a/ql/src/test/results/clientpositive/llap/input35.q.out +++ b/ql/src/test/results/clientpositive/llap/input35.q.out @@ -75,31 +75,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input36.q.out b/ql/src/test/results/clientpositive/llap/input36.q.out index 8a8e30c1ea..f748418fb3 100644 --- a/ql/src/test/results/clientpositive/llap/input36.q.out +++ b/ql/src/test/results/clientpositive/llap/input36.q.out @@ -75,31 +75,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input38.q.out b/ql/src/test/results/clientpositive/llap/input38.q.out index 161cda2ac7..6e0fad5f69 100644 --- a/ql/src/test/results/clientpositive/llap/input38.q.out +++ b/ql/src/test/results/clientpositive/llap/input38.q.out @@ -69,31 +69,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input3_limit.q.out b/ql/src/test/results/clientpositive/llap/input3_limit.q.out index c9616de7a2..80a416c1dd 100644 --- a/ql/src/test/results/clientpositive/llap/input3_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/input3_limit.q.out @@ -115,31 +115,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input5.q.out b/ql/src/test/results/clientpositive/llap/input5.q.out index ab091fe0df..9cbf16cfa2 100644 --- a/ql/src/test/results/clientpositive/llap/input5.q.out +++ b/ql/src/test/results/clientpositive/llap/input5.q.out @@ -86,31 +86,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 11 Data size: 29480 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input6.q.out b/ql/src/test/results/clientpositive/llap/input6.q.out index 6565abd3b9..3b521b6718 100644 --- a/ql/src/test/results/clientpositive/llap/input6.q.out +++ b/ql/src/test/results/clientpositive/llap/input6.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input7.q.out b/ql/src/test/results/clientpositive/llap/input7.q.out index 88afecd8c3..9bf6ff1d54 100644 --- a/ql/src/test/results/clientpositive/llap/input7.q.out +++ b/ql/src/test/results/clientpositive/llap/input7.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: c1, c2 Statistics: Num rows: 25 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input8.q.out b/ql/src/test/results/clientpositive/llap/input8.q.out index 6e8c5a1135..6579d62e62 100644 --- a/ql/src/test/results/clientpositive/llap/input8.q.out +++ b/ql/src/test/results/clientpositive/llap/input8.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3 Statistics: Num rows: 25 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input9.q.out b/ql/src/test/results/clientpositive/llap/input9.q.out index dda00b30e1..e5024bff94 100644 --- a/ql/src/test/results/clientpositive/llap/input9.q.out +++ b/ql/src/test/results/clientpositive/llap/input9.q.out @@ -61,33 +61,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), min(key), max(key), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_part1.q.out b/ql/src/test/results/clientpositive/llap/input_part1.q.out index acd0ac6af2..3645dc515a 100644 --- a/ql/src/test/results/clientpositive/llap/input_part1.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part1.q.out @@ -83,19 +83,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(hr), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(ds), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -146,34 +146,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_part10.q.out b/ql/src/test/results/clientpositive/llap/input_part10.q.out index dac65c50a4..721e732730 100644 --- a/ql/src/test/results/clientpositive/llap/input_part10.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part10.q.out @@ -92,35 +92,35 @@ STAGE PLANS: outputColumnNames: a, b, ds, ts Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector(a, 'hll'), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector(b, 'hll') keys: ds (type: string), ts (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 662 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 662 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 526 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/input_part2.q.out b/ql/src/test/results/clientpositive/llap/input_part2.q.out index cb411ca417..b2cdf7bff3 100644 --- a/ql/src/test/results/clientpositive/llap/input_part2.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part2.q.out @@ -96,19 +96,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 91575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(hr), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(ds), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Filter Operator isSamplingPred: false @@ -149,19 +149,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 91575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(hr), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(ds), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -250,67 +250,75 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_part5.q.out b/ql/src/test/results/clientpositive/llap/input_part5.q.out index cf429ac3b8..0c703e2093 100644 --- a/ql/src/test/results/clientpositive/llap/input_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/input_part5.q.out @@ -62,33 +62,37 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 151848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(hr), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(ds), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out b/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out index e882292886..6f42e003e9 100644 --- a/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out +++ b/ql/src/test/results/clientpositive/llap/input_testsequencefile.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_testxpath.q.out b/ql/src/test/results/clientpositive/llap/input_testxpath.q.out index f5f76d9a6d..bab05cd212 100644 --- a/ql/src/test/results/clientpositive/llap/input_testxpath.q.out +++ b/ql/src/test/results/clientpositive/llap/input_testxpath.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: key, value, mapvalue Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(mapvalue, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(mapvalue)), avg(COALESCE(length(mapvalue),0)), count(mapvalue), compute_bit_vector(mapvalue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4904 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 4232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 4904 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 4232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 4232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 4232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out b/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out index 017206a37a..aaf024d166 100644 --- a/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out +++ b/ql/src/test/results/clientpositive/llap/input_testxpath2.q.out @@ -58,33 +58,37 @@ STAGE PLANS: outputColumnNames: lint_size, lintstring_size, mstringstring_size Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(lint_size, 'hll'), compute_stats(lintstring_size, 'hll'), compute_stats(mstringstring_size, 'hll') + aggregations: min(lint_size), max(lint_size), count(1), count(lint_size), compute_bit_vector(lint_size, 'hll'), min(lintstring_size), max(lintstring_size), count(lintstring_size), compute_bit_vector(lintstring_size, 'hll'), min(mstringstring_size), max(mstringstring_size), count(mstringstring_size), compute_bit_vector(mstringstring_size, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4872 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 4088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 4872 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 4088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4920 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 4088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 4088 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4088 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert1.q.out b/ql/src/test/results/clientpositive/llap/insert1.q.out index 13075a0574..60b6802901 100644 --- a/ql/src/test/results/clientpositive/llap/insert1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert1.q.out @@ -72,33 +72,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,33 +173,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -280,33 +288,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -377,33 +389,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -484,16 +500,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE @@ -514,48 +530,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out index 09f292f936..eb45ca050a 100644 --- a/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/insert1_overwrite_partitions.q.out @@ -106,11 +106,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -118,19 +118,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -311,31 +311,35 @@ STAGE PLANS: outputColumnNames: one, two Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 840 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 840 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 840 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -485,11 +489,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -497,19 +501,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out index f94d6f6efb..92f8cc32f0 100644 --- a/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/insert2_overwrite_partitions.q.out @@ -117,11 +117,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -129,19 +129,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -277,11 +277,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -289,19 +289,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/insert_into1.q.out b/ql/src/test/results/clientpositive/llap/insert_into1.q.out index 960bee71a0..50af005340 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -257,31 +261,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -430,31 +438,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -574,33 +586,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -678,33 +694,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_into2.q.out b/ql/src/test/results/clientpositive/llap/insert_into2.q.out index ba1a56b4b8..29cd35295a 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into2.q.out @@ -88,35 +88,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -314,35 +314,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -505,35 +505,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 50 Data size: 9000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.98 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insert_into3.q.out b/ql/src/test/results/clientpositive/llap/insert_into3.q.out index 9c9820e661..9e275d27c6 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into3.q.out @@ -119,31 +119,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 50 Data size: 4750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -171,31 +175,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -369,31 +377,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -421,31 +433,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_into4.q.out b/ql/src/test/results/clientpositive/llap/insert_into4.q.out index f2e4bab82c..a19b952485 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into4.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into4.q.out @@ -92,31 +92,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -232,31 +236,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -346,33 +354,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_into5.q.out b/ql/src/test/results/clientpositive/llap/insert_into5.q.out index 2573fd2591..8cffc06f7c 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into5.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into5.q.out @@ -88,31 +88,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -202,33 +206,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -320,37 +328,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 20 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -452,37 +460,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 20 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insert_into6.q.out b/ql/src/test/results/clientpositive/llap/insert_into6.q.out index fabcf410ca..0fc0523315 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into6.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into6.q.out @@ -94,35 +94,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 150 Data size: 27000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -237,37 +237,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2096 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 2096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 2 Data size: 1168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1428 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1428 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out index bf8270de8c..369f3861e1 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out @@ -82,31 +82,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -225,31 +229,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -368,31 +376,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -512,31 +524,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -655,31 +671,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -816,31 +836,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -969,31 +993,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1112,31 +1140,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1255,31 +1287,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1399,31 +1435,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1542,31 +1582,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1685,31 +1729,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1828,31 +1876,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2214,37 +2266,37 @@ STAGE PLANS: outputColumnNames: i, j, ds Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(j), max(j), count(j), compute_bit_vector(j, 'hll') keys: ds (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2352,37 +2404,37 @@ STAGE PLANS: outputColumnNames: i, j, ds Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(j), max(j), count(j), compute_bit_vector(j, 'hll') keys: ds (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2473,37 +2525,37 @@ STAGE PLANS: outputColumnNames: i, j, ds Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(i, 'hll'), compute_stats(j, 'hll') + aggregations: min(i), max(i), count(1), count(i), compute_bit_vector(i, 'hll'), min(j), max(j), count(j), compute_bit_vector(j, 'hll') keys: ds (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 933 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 613 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2736,31 +2788,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -3030,31 +3086,35 @@ STAGE PLANS: outputColumnNames: key, a1, value Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector(a1, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3330,31 +3390,35 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out index 40fc7f9cec..039aa4b9a1 100644 --- a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out @@ -100,35 +100,35 @@ STAGE PLANS: outputColumnNames: name, age, gpa, year Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(name, 'hll'), compute_stats(age, 'hll'), compute_stats(gpa, 'hll') + aggregations: max(length(name)), avg(COALESCE(length(name),0)), count(1), count(name), compute_bit_vector(name, 'hll'), min(age), max(age), count(age), compute_bit_vector(age, 'hll'), min(gpa), max(gpa), count(gpa), compute_bit_vector(gpa, 'hll') keys: year (type: int) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: decimal(3,2)), _col11 (type: decimal(3,2)), _col12 (type: bigint), _col13 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DECIMAL' (type: string), _col10 (type: decimal(3,2)), _col11 (type: decimal(3,2)), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1009 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1009 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out index 54a7ac0d49..fcc2874b30 100644 --- a/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out +++ b/ql/src/test/results/clientpositive/llap/insertoverwrite_bucket.q.out @@ -158,31 +158,35 @@ STAGE PLANS: outputColumnNames: change, num Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(change, 'hll'), compute_stats(num, 'hll') + aggregations: max(length(change)), avg(COALESCE(length(change),0)), count(1), count(change), compute_bit_vector(change, 'hll'), max(length(num)), avg(COALESCE(length(num),0)), count(num), compute_bit_vector(num, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -292,31 +296,35 @@ STAGE PLANS: outputColumnNames: create_ts, change, num Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(create_ts, 'hll'), compute_stats(change, 'hll'), compute_stats(num, 'hll') + aggregations: max(length(create_ts)), avg(COALESCE(length(create_ts),0)), count(1), count(create_ts), compute_bit_vector(create_ts, 'hll'), max(length(change)), avg(COALESCE(length(change),0)), count(change), compute_bit_vector(change, 'hll'), max(length(num)), avg(COALESCE(length(num),0)), count(num), compute_bit_vector(num, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join1.q.out b/ql/src/test/results/clientpositive/llap/join1.q.out index 5d26bbe9c1..f391af0bb3 100644 --- a/ql/src/test/results/clientpositive/llap/join1.q.out +++ b/ql/src/test/results/clientpositive/llap/join1.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join14.q.out b/ql/src/test/results/clientpositive/llap/join14.q.out index dfe667f472..c3768f46ad 100644 --- a/ql/src/test/results/clientpositive/llap/join14.q.out +++ b/ql/src/test/results/clientpositive/llap/join14.q.out @@ -110,31 +110,35 @@ STAGE PLANS: outputColumnNames: c1, c2 Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join17.q.out b/ql/src/test/results/clientpositive/llap/join17.q.out index e432fb8019..1fa7f58267 100644 --- a/ql/src/test/results/clientpositive/llap/join17.q.out +++ b/ql/src/test/results/clientpositive/llap/join17.q.out @@ -218,53 +218,57 @@ STAGE PLANS: outputColumnNames: key1, value1, key2, value2 Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + aggregations: min(key1), max(key1), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), min(key2), max(key2), count(key2), compute_bit_vector(key2, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join25.q.out b/ql/src/test/results/clientpositive/llap/join25.q.out index 8d72e5a999..942eb02ba1 100644 --- a/ql/src/test/results/clientpositive/llap/join25.q.out +++ b/ql/src/test/results/clientpositive/llap/join25.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 39 Data size: 7176 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.974359 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join26.q.out b/ql/src/test/results/clientpositive/llap/join26.q.out index 71a0af2324..e8fc00d823 100644 --- a/ql/src/test/results/clientpositive/llap/join26.q.out +++ b/ql/src/test/results/clientpositive/llap/join26.q.out @@ -257,19 +257,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9836066 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -320,34 +320,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join27.q.out b/ql/src/test/results/clientpositive/llap/join27.q.out index 4328cd6bd6..c9308a7f36 100644 --- a/ql/src/test/results/clientpositive/llap/join27.q.out +++ b/ql/src/test/results/clientpositive/llap/join27.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 40 Data size: 7360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.975 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join28.q.out b/ql/src/test/results/clientpositive/llap/join28.q.out index 485158dc21..22995ec5f9 100644 --- a/ql/src/test/results/clientpositive/llap/join28.q.out +++ b/ql/src/test/results/clientpositive/llap/join28.q.out @@ -140,33 +140,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9836066 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join29.q.out b/ql/src/test/results/clientpositive/llap/join29.q.out index b2df356f2e..4828436514 100644 --- a/ql/src/test/results/clientpositive/llap/join29.q.out +++ b/ql/src/test/results/clientpositive/llap/join29.q.out @@ -142,31 +142,35 @@ STAGE PLANS: outputColumnNames: key, cnt1, cnt2 Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt1), max(cnt1), count(cnt1), compute_bit_vector(cnt1, 'hll'), min(cnt2), max(cnt2), count(cnt2), compute_bit_vector(cnt2, 'hll') minReductionHashAggr: 0.9166667 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join3.q.out b/ql/src/test/results/clientpositive/llap/join3.q.out index 237c0a3690..8de422737b 100644 --- a/ql/src/test/results/clientpositive/llap/join3.q.out +++ b/ql/src/test/results/clientpositive/llap/join3.q.out @@ -143,31 +143,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join30.q.out b/ql/src/test/results/clientpositive/llap/join30.q.out index 0a79ce5848..c11b33c16e 100644 --- a/ql/src/test/results/clientpositive/llap/join30.q.out +++ b/ql/src/test/results/clientpositive/llap/join30.q.out @@ -122,31 +122,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.9375 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join31.q.out b/ql/src/test/results/clientpositive/llap/join31.q.out index 20914a471d..e041c62228 100644 --- a/ql/src/test/results/clientpositive/llap/join31.q.out +++ b/ql/src/test/results/clientpositive/llap/join31.q.out @@ -150,31 +150,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join32.q.out b/ql/src/test/results/clientpositive/llap/join32.q.out index 20db9a3bbd..9f3e7de8d9 100644 --- a/ql/src/test/results/clientpositive/llap/join32.q.out +++ b/ql/src/test/results/clientpositive/llap/join32.q.out @@ -258,19 +258,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -321,34 +321,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 106050f45f..4b2f07a06e 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -266,19 +266,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -329,34 +329,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -839,19 +843,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.989899 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -897,34 +901,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1326,19 +1334,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1389,34 +1397,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1807,19 +1819,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1894,34 +1906,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2217,33 +2233,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2514,33 +2534,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join33.q.out b/ql/src/test/results/clientpositive/llap/join33.q.out index bfbb4ba310..a45a6cf58d 100644 --- a/ql/src/test/results/clientpositive/llap/join33.q.out +++ b/ql/src/test/results/clientpositive/llap/join33.q.out @@ -258,19 +258,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.984127 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -321,34 +321,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join34.q.out b/ql/src/test/results/clientpositive/llap/join34.q.out index a58ee4af3d..cb87dc7f3d 100644 --- a/ql/src/test/results/clientpositive/llap/join34.q.out +++ b/ql/src/test/results/clientpositive/llap/join34.q.out @@ -121,19 +121,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -236,19 +236,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -370,34 +370,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/join35.q.out b/ql/src/test/results/clientpositive/llap/join35.q.out index cd435a606c..a71f1c8b0c 100644 --- a/ql/src/test/results/clientpositive/llap/join35.q.out +++ b/ql/src/test/results/clientpositive/llap/join35.q.out @@ -330,53 +330,57 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), min(val2), max(val2), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 6 Execution mode: llap Needs Tagging: false @@ -434,19 +438,19 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), min(val2), max(val2), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.9782609 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/join36.q.out b/ql/src/test/results/clientpositive/llap/join36.q.out index 8071b04748..95067e500b 100644 --- a/ql/src/test/results/clientpositive/llap/join36.q.out +++ b/ql/src/test/results/clientpositive/llap/join36.q.out @@ -117,16 +117,16 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 315 Data size: 3780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll'), min(val2), max(val2), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -155,17 +155,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join37.q.out b/ql/src/test/results/clientpositive/llap/join37.q.out index 3c09084c9d..3e4f4cf69d 100644 --- a/ql/src/test/results/clientpositive/llap/join37.q.out +++ b/ql/src/test/results/clientpositive/llap/join37.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: key, value, val2 Statistics: Num rows: 39 Data size: 7176 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.974359 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join39.q.out b/ql/src/test/results/clientpositive/llap/join39.q.out index a2bc64d1b8..237797ed70 100644 --- a/ql/src/test/results/clientpositive/llap/join39.q.out +++ b/ql/src/test/results/clientpositive/llap/join39.q.out @@ -67,16 +67,16 @@ STAGE PLANS: outputColumnNames: key, value, key1, val2 Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(key1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(key1)), avg(COALESCE(length(key1),0)), count(key1), compute_bit_vector(key1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap LLAP IO: no inputs Map 3 @@ -105,17 +105,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join4.q.out b/ql/src/test/results/clientpositive/llap/join4.q.out index 7eae3ae6d7..077ea95037 100644 --- a/ql/src/test/results/clientpositive/llap/join4.q.out +++ b/ql/src/test/results/clientpositive/llap/join4.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join5.q.out b/ql/src/test/results/clientpositive/llap/join5.q.out index dce3d2eb06..64a9b10fc6 100644 --- a/ql/src/test/results/clientpositive/llap/join5.q.out +++ b/ql/src/test/results/clientpositive/llap/join5.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join6.q.out b/ql/src/test/results/clientpositive/llap/join6.q.out index e20d166590..2d5151ab69 100644 --- a/ql/src/test/results/clientpositive/llap/join6.q.out +++ b/ql/src/test/results/clientpositive/llap/join6.q.out @@ -127,31 +127,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join7.q.out b/ql/src/test/results/clientpositive/llap/join7.q.out index 2f4c862200..90756133f3 100644 --- a/ql/src/test/results/clientpositive/llap/join7.q.out +++ b/ql/src/test/results/clientpositive/llap/join7.q.out @@ -170,31 +170,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6 Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll'), min(c5), max(c5), count(c5), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(c6), compute_bit_vector(c6, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join8.q.out b/ql/src/test/results/clientpositive/llap/join8.q.out index a7f500baa3..74ce7592c4 100644 --- a/ql/src/test/results/clientpositive/llap/join8.q.out +++ b/ql/src/test/results/clientpositive/llap/join8.q.out @@ -130,31 +130,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/join9.q.out b/ql/src/test/results/clientpositive/llap/join9.q.out index 2ce0126eb0..156566fd94 100644 --- a/ql/src/test/results/clientpositive/llap/join9.q.out +++ b/ql/src/test/results/clientpositive/llap/join9.q.out @@ -226,53 +226,57 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out index 49b7a6b2df..a9cf7c31c9 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown_negative.q.out @@ -306,16 +306,16 @@ STAGE PLANS: outputColumnNames: key, c1 Statistics: Num rows: 307 Data size: 29165 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -335,17 +335,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -373,31 +377,35 @@ STAGE PLANS: outputColumnNames: key, c1 Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(c1), max(c1), count(c1), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/lineage1.q.out b/ql/src/test/results/clientpositive/llap/lineage1.q.out index cff56bb936..f885e3be51 100644 --- a/ql/src/test/results/clientpositive/llap/lineage1.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage1.q.out @@ -164,31 +164,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 128 Data size: 5426 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -221,16 +225,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 128 Data size: 5426 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 6576a8468c..85dc772009 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -5,7 +5,8 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src1 PREHOOK: Output: database:default PREHOOK: Output: default@src2 -{"version":"1.0","engine":"tez","database":"default","hash":"87921246fb098d44c05e0ccd9ecb0676","queryText":"create table src2 as select key key2, value value2 from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src2.value2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"87921246fb098d44c05e0ccd9ecb0676","queryText":"create table src2 as select key key2, value value2 from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src2.value2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} PREHOOK: query: select * from src1 where key is not null and value is not null limit 3 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -31,12 +32,14 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src1 PREHOOK: Output: database:default PREHOOK: Output: default@dest1_n56 -{"version":"1.0","engine":"tez","database":"default","hash":"01251b1a2a539f7bb1d533cf6a9de47d","queryText":"create table dest1_n56 as select * from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"01251b1a2a539f7bb1d533cf6a9de47d","queryText":"create table dest1_n56 as select * from src1","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} PREHOOK: query: insert into table dest1_n56 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1_n56 -{"version":"1.0","engine":"tez","database":"default","hash":"d3d379a20e27c1618037bd6b8e840b13","queryText":"insert into table dest1_n56 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"d3d379a20e27c1618037bd6b8e840b13","queryText":"insert into table dest1_n56 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1_n56.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1_n56.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1_n56.value from dest1_n56 PREHOOK: type: QUERY PREHOOK: Input: default@dest1_n56 @@ -461,26 +464,30 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"7e2a275cdee3a519d901b7b178eefcd7","queryText":"create table dest2_n11 as select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"7e2a275cdee3a519d901b7b178eefcd7","queryText":"create table dest2_n11 as select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"b275c2987a11e52fcecb46cfee2fb17e","queryText":"insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"b275c2987a11e52fcecb46cfee2fb17e","queryText":"insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"cabe07848c79ab95f0937586e75ad64e","queryText":"insert into table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"cabe07848c79ab95f0937586e75ad64e","queryText":"insert into table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2_n11 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"4c13fe982c4d22e5735ba469dee4b3d8","queryText":"insert into table dest2_n11\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"4c13fe982c4d22e5735ba469dee4b3d8","queryText":"insert into table dest2_n11\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -523,14 +530,16 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest3_n0 -{"version":"1.0","engine":"tez","database":"default","hash":"04c85db3424d79a3663c0532bc1e0a35","queryText":"create table dest3_n0 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"04c85db3424d79a3663c0532bc1e0a35","queryText":"create table dest3_n0 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3_n0.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3_n0.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2_n11 select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2_n11 -{"version":"1.0","engine":"tez","database":"default","hash":"fb315308480b6e64466a6db5246895d6","queryText":"insert overwrite table dest2_n11\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"fb315308480b6e64466a6db5246895d6","queryText":"insert overwrite table dest2_n11\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2_n11.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2_n11.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1_n0 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1_n0(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +561,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1_n0 -{"version":"1.0","engine":"tez","database":"default","hash":"40b5d904f13549d8c25bd0be758f5b6f","queryText":"INSERT OVERWRITE TABLE dest_l1_n0\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"p1.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"p2.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(j.key), 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(j.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"40b5d904f13549d8c25bd0be758f5b6f","queryText":"INSERT OVERWRITE TABLE dest_l1_n0\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"p1.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"p2.key is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n0.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept_n10 @@ -593,7 +603,8 @@ PREHOOK: Input: default@dept_n10 PREHOOK: Input: default@emp PREHOOK: Input: default@project_n10 PREHOOK: Output: default@tgt_n10 -{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept_n10 d ON d.dept_id = em.dept_id\n ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept_n10.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project_n10.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project_n10.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]} +Result schema has 6 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept_n10 d ON d.dept_id = em.dept_id\n ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -604,7 +615,8 @@ PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"f9a01e400eb50cc3c5ec0741ed20994c","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0,1],"expression":"col1","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToByte(col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"compute_stats(col1, 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToByte(col1), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(col1), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"f9a01e400eb50cc3c5ec0741ed20994c","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0,1],"expression":"col1","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToByte(col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(col1)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -625,7 +637,8 @@ PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"9f432e7641bec615db3eb365daa3eeae","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0,1,2,3],"expression":"col1","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1,2,3],"expression":"compute_stats(col1, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"9f432e7641bec615db3eb365daa3eeae","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0,1,2,3],"expression":"col1","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 @@ -659,7 +672,8 @@ PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 PREHOOK: Output: database:default PREHOOK: Output: default@t_n10 -{"version":"1.0","engine":"tez","database":"default","hash":"1a18373814a0ccf82ee1409db6a912b5","queryText":"create table t_n10 as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id > 0)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1],"expression":"((b.id > 0) and (b.c3 = 15))","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.dest_l2.c2, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.dest_l2.c3, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n10.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t_n10.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"1a18373814a0ccf82ee1409db6a912b5","queryText":"create table t_n10 as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id > 0)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1],"expression":"((b.id > 0) and (b.c3 = 15))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n10.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t_n10.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)), concat(substr(src1.key,1,1),sum(substr(src1.value,5))) from src1 @@ -704,4 +718,5 @@ from relations lateral view explode(ep1_ids) rel1 as ep1_id PREHOOK: type: QUERY PREHOOK: Input: default@relations PREHOOK: Output: default@rels_exploded -{"version":"1.0","engine":"tez","database":"default","hash":"56b2b197f394a30537ce1acf835ff8e1","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32))","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32))","edgeType":"PROJECTION"},{"sources":[8],"targets":[0],"expression":"compute_stats(default.relations.identity, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"expression":"compute_stats(default.relations.type, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"expression":"compute_stats(default.relations.ep1_src_type, 'hll')","edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"expression":"compute_stats(default.relations.ep1_type, 'hll')","edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"expression":"compute_stats(default.relations.ep2_src_type, 'hll')","edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"expression":"compute_stats(default.relations.ep2_type, 'hll')","edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"compute_stats(CAST( rel1._col11 AS CHAR(32)), 'hll')","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"compute_stats(CAST( rel2._col12 AS CHAR(32)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} +Result schema has 8 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"56b2b197f394a30537ce1acf835ff8e1","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32))","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32))","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index d762fbc049..c87d7c0c92 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -10,7 +10,8 @@ insert into table d1 select x + length(y) PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 -{"version":"1.0","engine":"tez","database":"default","hash":"a1d51634883428cbc72084be0ec2e641","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[1,2],"targets":[0],"expression":"compute_stats((UDFToInteger(a.ctinyint) + length(b.cstring1)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"a1d51634883428cbc72084be0ec2e641","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: drop table if exists d2 PREHOOK: type: DROPTABLE PREHOOK: query: create table d2(b varchar(128)) @@ -25,7 +26,9 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"84e3cdc38011da5842162df175b2a494","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t_n20.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1,0],"expression":"(t_n20.x > 0Y)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(x), 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[0],"expression":"compute_stats(CAST( y AS varchar(128)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +Result schema has 1 fields, but we don't get as many dependencies +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"84e3cdc38011da5842162df175b2a494","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t_n20\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"UDFToLong(a.cint) is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t_n20.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t_n20.x > 0Y)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t_n20 PREHOOK: type: DROPTABLE PREHOOK: query: create table t_n20 as @@ -36,7 +39,8 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src1 PREHOOK: Output: database:default PREHOOK: Output: default@t_n20 -{"version":"1.0","engine":"tez","database":"default","hash":"5a2daa3d8508025880412b524351c849","queryText":"create table t_n20 as\nselect * from\n (select * from\n (select key from src1 limit 1) v1) v2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[1],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n20.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src1.key"}]} +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"5a2daa3d8508025880412b524351c849","queryText":"create table t_n20 as\nselect * from\n (select * from\n (select key from src1 limit 1) v1) v2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t_n20.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.src1.key"}]} PREHOOK: query: drop table if exists dest_l1_n2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l1_n2(a int, b varchar(128)) @@ -51,7 +55,8 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1_n2@ds=today -{"version":"1.0","engine":"tez","database":"default","hash":"b56115e94fe07fda7b4d2ffecf57adc6","queryText":"insert into table dest_l1_n2 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1,2],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(default.alltypesorc.cint, 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( alltypesorc.cstring1 AS varchar(128)), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'today'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"b56115e94fe07fda7b4d2ffecf57adc6","queryText":"insert into table dest_l1_n2 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1_n2 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +66,8 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1_n2@ds=tomorrow -{"version":"1.0","engine":"tez","database":"default","hash":"53b7b48554f009345159739b3ab04fa1","queryText":"insert into table dest_l1_n2 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[3],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[5,3],"targets":[0,1,2],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2],"expression":"(min(default.alltypesorc.cbigint) > 10L)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(min(default.alltypesorc.cint), 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( min(default.alltypesorc.cstring1) AS varchar(128)), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'tomorrow'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"53b7b48554f009345159739b3ab04fa1","queryText":"insert into table dest_l1_n2 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10L)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1_n2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -348,12 +354,14 @@ PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year f PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp1 -{"version":"1.0","engine":"tez","database":"default","hash":"8d922f2fb420d3dffd87766f09123ccc","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[6],"targets":[4],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[5],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"first"},{"id":5,"vertexType":"COLUMN","vertexId":"word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} +Result schema has 3 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"8d922f2fb420d3dffd87766f09123ccc","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2 -{"version":"1.0","engine":"tez","database":"default","hash":"8fae561192d76da429955aebc0fd87f9","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[11],"targets":[4,5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[6],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[7],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":5,"vertexType":"COLUMN","vertexId":"month"},{"id":6,"vertexType":"COLUMN","vertexId":"first"},{"id":7,"vertexType":"COLUMN","vertexId":"word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":9,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":10,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} +Result schema has 4 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"8fae561192d76da429955aebc0fd87f9","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp @@ -387,7 +395,9 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -Failed to log lineage graph, query is not affected -java.lang.IndexOutOfBoundsException: Index: 2, Size: 2 -#### A masked pattern was here #### - +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +Result schema has 2 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"e540a88155ffa4bf6842a4fdf3bfe639","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out index fa0d8f0ed8..4b06982cc3 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_1.q.out @@ -87,12 +87,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -100,9 +100,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -191,30 +191,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index a22819003b..3cb709710f 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out index 4fe1310685..a14fe4b8cb 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_11.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out index 9b2ec89b48..b75aa85c0e 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_12.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4, col5, ds, hr Statistics: Num rows: 500 Data size: 306500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(col4), compute_bit_vector(col4, 'hll'), max(length(col5)), avg(COALESCE(length(col5),0)), count(col5), compute_bit_vector(col5, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Statistics: Num rows: 1 Data size: 1348 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1348 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: int), _col12 (type: struct), _col13 (type: bigint), _col14 (type: binary), _col15 (type: int), _col16 (type: struct), _col17 (type: bigint), _col18 (type: binary), _col19 (type: int), _col20 (type: struct), _col21 (type: bigint), _col22 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), max(VALUE._col17), avg(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col11,0)) (type: bigint), COALESCE(_col12,0) (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col15,0)) (type: bigint), COALESCE(_col16,0) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col19,0)) (type: bigint), COALESCE(_col20,0) (type: double), (_col4 - _col21) (type: bigint), COALESCE(ndv_compute_bit_vector(_col22),0) (type: bigint), _col22 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1510 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1510 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types struct:struct:struct:struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29,_col30,_col31 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out index ac035e3203..d0cc71faf4 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_13.q.out @@ -86,12 +86,12 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4, col5, ds, hr Statistics: Num rows: 500 Data size: 315000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(col4), compute_bit_vector(col4, 'hll'), max(length(col5)), avg(COALESCE(length(col5),0)), count(col5), compute_bit_vector(col5, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Statistics: Num rows: 1 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -99,9 +99,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: int), _col12 (type: struct), _col13 (type: bigint), _col14 (type: binary), _col15 (type: int), _col16 (type: struct), _col17 (type: bigint), _col18 (type: binary), _col19 (type: int), _col20 (type: struct), _col21 (type: bigint), _col22 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -147,30 +147,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), max(VALUE._col17), avg(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Statistics: Num rows: 1 Data size: 1025 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col11,0)) (type: bigint), COALESCE(_col12,0) (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col15,0)) (type: bigint), COALESCE(_col16,0) (type: double), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col19,0)) (type: bigint), COALESCE(_col20,0) (type: double), (_col4 - _col21) (type: bigint), COALESCE(ndv_compute_bit_vector(_col22),0) (type: bigint), _col22 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 + Statistics: Num rows: 1 Data size: 1527 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 2397 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1527 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types struct:struct:struct:struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29,_col30,_col31 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out index e79a6e7a58..7f376f7ac8 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_14.q.out @@ -77,19 +77,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -135,34 +135,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out index dcae664bb5..b506156dba 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_2.q.out @@ -92,12 +92,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -105,9 +105,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -196,30 +196,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out index a36d0b8e50..a2b0f00cfb 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_3.q.out @@ -82,12 +82,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -95,9 +95,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -186,30 +186,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out index 68144f03f1..964f4ae688 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_4.q.out @@ -92,12 +92,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -105,9 +105,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -196,30 +196,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -413,12 +413,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -426,9 +426,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -517,30 +517,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out index e5cb4f95e6..47d4a5e92f 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_5.q.out @@ -87,12 +87,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -100,9 +100,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -191,30 +191,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out index dd1e97bec2..3be3f9ca27 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out @@ -91,12 +91,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,9 +104,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -195,30 +195,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -456,12 +456,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,9 +469,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -560,30 +560,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out index 87cb08fe12..a8ad351a53 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out @@ -91,12 +91,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,9 +104,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -195,30 +195,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -456,12 +456,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,9 +469,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -560,30 +560,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out index 1938bfbf4e..b49a276cd5 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_9.q.out @@ -92,12 +92,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -105,9 +105,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -196,30 +196,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -413,12 +413,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -426,9 +426,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -517,30 +517,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f2b9cd3479..6fea21c576 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -159,37 +159,37 @@ STAGE PLANS: outputColumnNames: ctinyint, csmallint, cint Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll') + aggregations: min(ctinyint), max(ctinyint), count(1), count(ctinyint), compute_bit_vector(ctinyint, 'hll'), min(csmallint), max(csmallint), count(csmallint), compute_bit_vector(csmallint, 'hll') keys: cint (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 5 Data size: 1660 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 5 Data size: 1660 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: tinyint), _col2 (type: tinyint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: smallint), _col7 (type: smallint), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 5 Data size: 1660 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 5 Data size: 2660 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2660 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index c7219f624c..4eefbbd4fa 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -105,19 +105,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -138,37 +138,37 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-12-31' (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -177,18 +177,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out index f9ced8e400..bbfec4f169 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part10.q.out @@ -85,37 +85,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out index 708720eb50..c4b683e12d 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part13.q.out @@ -100,19 +100,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 221 Data size: 79118 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -141,37 +141,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 221 Data size: 79118 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out index cf08318e71..f662100218 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out @@ -136,35 +136,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 514 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,19 +192,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -228,19 +228,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index 6bb88e25b9..9adfdac208 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -98,35 +98,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out index 48ca6a59f3..d1bef1252f 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part4.q.out @@ -97,37 +97,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 4 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 4 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 4 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 4 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 22bb823c54..8975270cd2 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -58,19 +58,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') keys: value (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 82750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 250 Data size: 82750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -84,18 +84,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 65750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 89250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 89250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out index f46a6bcce2..f887a68765 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part8.q.out @@ -119,12 +119,12 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -132,9 +132,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Filter Operator isSamplingPred: false @@ -178,12 +178,12 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: '2008-12-31' (type: string), _col1 (type: string) @@ -191,9 +191,9 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -358,30 +358,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -396,30 +396,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out index 451a0a2ddf..9208f6abb9 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part9.q.out @@ -85,37 +85,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 546000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out index 57654464da..3a3a1a6c17 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out index 3cf3584e51..bd3121fd5e 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out @@ -81,31 +81,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce3.q.out b/ql/src/test/results/clientpositive/llap/mapreduce3.q.out index f691d40eb6..ce38c612f7 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce3.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce3.q.out @@ -81,31 +81,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce4.q.out b/ql/src/test/results/clientpositive/llap/mapreduce4.q.out index 12882a8684..0be056302e 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce4.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce4.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce5.q.out b/ql/src/test/results/clientpositive/llap/mapreduce5.q.out index 4c014c3223..5e76869204 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce5.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce5.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce6.q.out b/ql/src/test/results/clientpositive/llap/mapreduce6.q.out index cf3e69b2e0..38e384fc1f 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce6.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce6.q.out @@ -77,31 +77,35 @@ STAGE PLANS: outputColumnNames: key, ten, one, value Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce7.q.out b/ql/src/test/results/clientpositive/llap/mapreduce7.q.out index 10d42cefee..764d58ef06 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce7.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce7.q.out @@ -81,31 +81,35 @@ STAGE PLANS: outputColumnNames: k, v, key, ten, one, value Statistics: Num rows: 500 Data size: 140500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(k)), avg(COALESCE(length(k),0)), count(1), count(k), compute_bit_vector(k, 'hll'), max(length(v)), avg(COALESCE(length(v),0)), count(v), compute_bit_vector(v, 'hll'), min(key), max(key), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/mapreduce8.q.out b/ql/src/test/results/clientpositive/llap/mapreduce8.q.out index 1a38974d81..9439894d4b 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce8.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce8.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: k, v, key, ten, one, value Statistics: Num rows: 500 Data size: 140500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(k)), avg(COALESCE(length(k),0)), count(1), count(k), compute_bit_vector(k, 'hll'), max(length(v)), avg(COALESCE(length(v),0)), count(v), compute_bit_vector(v, 'hll'), min(key), max(key), count(key), compute_bit_vector(key, 'hll'), min(ten), max(ten), count(ten), compute_bit_vector(ten, 'hll'), min(one), max(one), count(one), compute_bit_vector(one, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/masking_mv.q.out b/ql/src/test/results/clientpositive/llap/masking_mv.q.out index 05a0f613a1..196688a17d 100644 --- a/ql/src/test/results/clientpositive/llap/masking_mv.q.out +++ b/ql/src/test/results/clientpositive/llap/masking_mv.q.out @@ -64,33 +64,37 @@ STAGE PLANS: outputColumnNames: col1 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -753,33 +757,37 @@ STAGE PLANS: outputColumnNames: col1 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out index 9af6567987..e34147d1da 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out @@ -85,31 +85,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -497,33 +501,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -842,31 +850,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1101,31 +1113,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 55 Data size: 9955 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1251,31 +1267,35 @@ STAGE PLANS: outputColumnNames: value, key, tes"t, te*#"s"t Statistics: Num rows: 55 Data size: 10835 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll'), compute_stats(tes"t, 'hll'), compute_stats(te*#"s"t, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll'), min(tes"t), max(tes"t), count(tes"t), compute_bit_vector(tes"t, 'hll'), min(te*#"s"t), max(te*#"s"t), count(te*#"s"t), compute_bit_vector(te*#"s"t, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out index 2d11b3fca1..6e6ee34361 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -638,31 +642,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out index 13d7f5a756..23489244bd 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out @@ -183,31 +183,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -832,16 +836,16 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(_c2), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Filter Operator predicate: ((_col0 = _col4) and (_col1 = _col5)) (type: boolean) Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE @@ -860,17 +864,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1223,31 +1231,35 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(_c2), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1524,31 +1536,35 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(_c2), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1844,16 +1860,16 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(_c2), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Filter Operator predicate: ((_col0 = _col4) and (_col1 = _col5)) (type: boolean) Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE @@ -1872,17 +1888,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out index e2fff711cd..80f4628e1f 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out @@ -300,31 +300,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -575,31 +579,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -806,31 +814,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1041,31 +1053,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out index 0a106ba0a3..d4bf2a6d8b 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -638,31 +642,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out index 6cce2a45f7..dc0e861863 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -762,31 +766,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out index 5961735f29..cdcc356ae3 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out @@ -85,31 +85,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -363,31 +367,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -668,33 +676,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -889,31 +901,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1242,31 +1258,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out index 25c5aedc12..4d6de9d8f7 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out @@ -86,35 +86,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 55 Data size: 10230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') keys: col3 (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 27 Data size: 12960 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 27 Data size: 12960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 27 Data size: 9288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -985,37 +985,37 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 18 Data size: 3384 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 9 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 9 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 9 Data size: 3096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1447,35 +1447,35 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 18 Data size: 3402 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 9 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 9 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 9 Data size: 3096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1945,35 +1945,35 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 55 Data size: 10395 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 27 Data size: 12960 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 27 Data size: 12960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 27 Data size: 9288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2261,35 +2261,35 @@ STAGE PLANS: outputColumnNames: value, key, tes"t, te*#"s"t, partkey Statistics: Num rows: 55 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll'), compute_stats(tes"t, 'hll'), compute_stats(te*#"s"t, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), max(length(key)), avg(COALESCE(length(key),0)), count(key), compute_bit_vector(key, 'hll'), min(tes"t), max(tes"t), count(tes"t), compute_bit_vector(tes"t, 'hll'), min(te*#"s"t), max(te*#"s"t), count(te*#"s"t), compute_bit_vector(te*#"s"t, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 46872 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 22032 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 46872 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 27 Data size: 22032 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 18360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DOUBLE' (type: string), _col14 (type: double), _col15 (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 27 Data size: 28944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 28944 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out index 85e22c791d..e463b3cba6 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out @@ -62,19 +62,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 8829 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8829 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -88,18 +88,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -653,19 +653,19 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll') keys: key (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 9 Data size: 2943 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 9 Data size: 2943 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -679,18 +679,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 9 Data size: 2331 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1033,19 +1033,19 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll') keys: key (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 9 Data size: 2943 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 9 Data size: 2943 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -1057,18 +1057,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 9 Data size: 2331 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out index da6e057636..8bdbf13d8a 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out @@ -62,19 +62,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 8829 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8829 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -88,18 +88,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out index 26e3856761..580be3f805 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out @@ -328,31 +328,35 @@ STAGE PLANS: outputColumnNames: quartile, total Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') + aggregations: min(quartile), max(quartile), count(1), count(quartile), compute_bit_vector(quartile, 'hll'), min(total), max(total), count(total), compute_bit_vector(total, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -635,31 +639,35 @@ STAGE PLANS: outputColumnNames: quartile, total Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') + aggregations: min(quartile), max(quartile), count(1), count(quartile), compute_bit_vector(quartile, 'hll'), min(total), max(total), count(total), compute_bit_vector(total, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -965,31 +973,35 @@ STAGE PLANS: outputColumnNames: total_views, quartile, program Statistics: Num rows: 6 Data size: 1266 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(total_views, 'hll'), compute_stats(quartile, 'hll'), compute_stats(program, 'hll') + aggregations: min(total_views), max(total_views), count(1), count(total_views), compute_bit_vector(total_views, 'hll'), min(quartile), max(quartile), count(quartile), compute_bit_vector(quartile, 'hll'), max(length(program)), avg(COALESCE(length(program),0)), count(program), compute_bit_vector(program, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(12,1)), _col6 (type: decimal(12,1)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 716 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(12,1)), _col6 (type: decimal(12,1)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/merge1.q.out b/ql/src/test/results/clientpositive/llap/merge1.q.out index ef4378fff7..1a227cb6a7 100644 --- a/ql/src/test/results/clientpositive/llap/merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/merge1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, val Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -554,33 +558,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -666,33 +674,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/merge2.q.out b/ql/src/test/results/clientpositive/llap/merge2.q.out index ee4681df80..0048aa13eb 100644 --- a/ql/src/test/results/clientpositive/llap/merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge2.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: key, val Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -554,33 +558,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -666,33 +674,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/merge3.q.out b/ql/src/test/results/clientpositive/llap/merge3.q.out index 141e40891e..a4f15ff8a1 100644 --- a/ql/src/test/results/clientpositive/llap/merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/merge3.q.out @@ -118,19 +118,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -176,34 +176,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2375,12 +2379,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -2388,9 +2392,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -2477,30 +2481,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -4780,30 +4784,30 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1432 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/merge4.q.out b/ql/src/test/results/clientpositive/llap/merge4.q.out index e15a91c038..a57440f68c 100644 --- a/ql/src/test/results/clientpositive/llap/merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge4.q.out @@ -56,37 +56,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1196,37 +1196,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2886,35 +2886,35 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 614 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out index 72db3e800c..6044d9154f 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition.q.out @@ -88,19 +88,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 49864 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 99 Data size: 96592 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 99 Data size: 96592 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -114,18 +114,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 49 Data size: 45392 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 49 Data size: 45392 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 49 Data size: 45392 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -758,11 +758,11 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -770,21 +770,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -1398,19 +1398,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 49864 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 99 Data size: 96592 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 99 Data size: 136984 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 99 Data size: 96592 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -1424,18 +1424,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 49 Data size: 45392 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 49 Data size: 45392 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 49 Data size: 65384 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 49 Data size: 45392 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out index 9bf582529b..b5f7fb26c1 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition2.q.out @@ -107,19 +107,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 297 Data size: 148488 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 297 Data size: 409848 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 297 Data size: 288672 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 297 Data size: 409848 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 297 Data size: 288672 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -133,18 +133,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 148 Data size: 196480 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 148 Data size: 136096 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 148 Data size: 196480 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 148 Data size: 136096 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 148 Data size: 196480 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 148 Data size: 136096 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out index d8b4c0dbd2..4970d781ca 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition3.q.out @@ -171,19 +171,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 594 Data size: 405536 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 594 Data size: 928256 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 594 Data size: 685904 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 594 Data size: 928256 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 594 Data size: 685904 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -197,18 +197,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 297 Data size: 448304 Basic stats: PARTIAL Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 297 Data size: 327128 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 297 Data size: 448304 Basic stats: PARTIAL Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 297 Data size: 327128 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 297 Data size: 448304 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 297 Data size: 327128 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out index 49b44ac297..07675ee356 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out @@ -175,37 +175,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out index 1b2567894f..eadcc4a64a 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out @@ -151,37 +151,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 618 Data size: 221244 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out index 193021743a..ab4f308a8b 100644 --- a/ql/src/test/results/clientpositive/llap/mm_all.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -100,37 +100,37 @@ STAGE PLANS: outputColumnNames: key, key_mm Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') keys: key_mm (type: int) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 3 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 3 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 3 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multi_insert.q.out b/ql/src/test/results/clientpositive/llap/multi_insert.q.out index 9eff316da0..e924def356 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert.q.out @@ -72,16 +72,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -102,48 +102,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -295,16 +303,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -325,48 +333,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -518,16 +534,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -548,48 +564,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -741,16 +765,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -771,48 +795,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -983,16 +1015,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1014,46 +1046,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1217,16 +1257,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1248,46 +1288,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1451,16 +1499,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1482,46 +1530,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1685,16 +1741,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1716,46 +1772,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1902,16 +1966,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1928,16 +1992,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -1965,16 +2029,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1991,48 +2055,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2207,16 +2279,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2233,16 +2305,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2270,16 +2342,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2296,48 +2368,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2512,16 +2592,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2538,16 +2618,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2575,16 +2655,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2601,48 +2681,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2817,16 +2905,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2843,16 +2931,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2880,16 +2968,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2906,48 +2994,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out index 57a94217b8..8e9ccffc51 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby.q.out @@ -98,16 +98,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 105 Data size: 9555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 9570 Basic stats: COMPLETE Column stats: COMPLETE @@ -134,46 +134,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 105 Data size: 9555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -370,16 +378,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 316 Data size: 28756 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -406,46 +414,54 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 105 Data size: 9555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out index 42d167ec20..34da2a9a8b 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby2.q.out @@ -93,17 +93,21 @@ STAGE PLANS: outputColumnNames: count Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(count, 'hll') + aggregations: min(count), max(count), count(1), count(count), compute_bit_vector(count, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -122,17 +126,21 @@ STAGE PLANS: outputColumnNames: percentile Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(percentile, 'hll') + aggregations: min(percentile), max(percentile), count(1), count(percentile), compute_bit_vector(percentile, 'hll') mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DOUBLE' (type: string), _col0 (type: double), _col1 (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out index 0777184745..647e7c75d5 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby3.q.out @@ -129,31 +129,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -180,31 +184,35 @@ STAGE PLANS: outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -352,31 +360,35 @@ STAGE PLANS: outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -403,31 +415,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1888,31 +1904,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1939,31 +1959,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2134,31 +2158,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2185,31 +2213,35 @@ STAGE PLANS: outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -2236,31 +2268,35 @@ STAGE PLANS: outputColumnNames: key, keyd Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(keyd), max(keyd), count(keyd), compute_bit_vector(keyd, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out index 554a174f5c..62489c6685 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_gby4.q.out @@ -120,16 +120,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 106 Data size: 9646 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -156,16 +156,16 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 106 Data size: 9646 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -192,61 +192,73 @@ STAGE PLANS: outputColumnNames: key, count Statistics: Num rows: 106 Data size: 9646 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(count), max(count), count(count), compute_bit_vector(count, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out index 43e6b6c65d..9d8c9eb93d 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out @@ -98,16 +98,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -135,16 +135,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Lateral View Forward Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -171,16 +171,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -208,48 +208,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -526,31 +534,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -577,31 +589,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -828,31 +844,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -884,16 +904,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 542 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -920,46 +940,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 542 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1269,31 +1297,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1320,31 +1352,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1371,31 +1407,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1742,31 +1782,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1793,31 +1837,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 10 Data size: 2710 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1849,16 +1897,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE @@ -1885,46 +1933,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out index 2bbf32e0a3..5ebff7e0fb 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_mixed.q.out @@ -125,16 +125,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -172,17 +172,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -218,32 +222,40 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out index 8a042db312..e806b510ac 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_move_tasks_share_dependencies.q.out @@ -72,16 +72,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -102,48 +102,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -295,16 +303,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -325,48 +333,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -518,16 +534,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -548,48 +564,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -741,16 +765,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -771,48 +795,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -983,16 +1015,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1014,46 +1046,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1217,16 +1257,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1248,46 +1288,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1451,16 +1499,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1482,46 +1530,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1685,16 +1741,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.98630136 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1716,46 +1772,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9583333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1902,16 +1966,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1928,16 +1992,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -1965,16 +2029,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1991,48 +2055,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2207,16 +2279,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2233,16 +2305,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2270,16 +2342,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2296,48 +2368,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2512,16 +2592,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2538,16 +2618,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2575,16 +2655,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2601,48 +2681,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -2817,16 +2905,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2843,16 +2931,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2880,16 +2968,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -2906,48 +2994,56 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -4053,16 +4149,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -4083,16 +4179,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -4108,32 +4204,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4363,16 +4467,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -4393,16 +4497,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -4418,32 +4522,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4673,16 +4785,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -4703,16 +4815,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -4728,32 +4840,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4983,16 +5103,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -5013,16 +5133,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 39338 Basic stats: COMPLETE Column stats: COMPLETE @@ -5038,32 +5158,40 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out index 335af8f9c6..2a933f772d 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_union_src.q.out @@ -161,17 +161,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 58 Data size: 10324 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -192,17 +196,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 58 Data size: 10324 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out index 4af33dcfe8..4051c6da86 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_with_join2.q.out @@ -153,31 +153,35 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -299,31 +303,35 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -449,16 +457,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -479,46 +487,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -662,16 +678,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -692,46 +708,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -883,16 +907,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -913,46 +937,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1104,16 +1136,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -1134,46 +1166,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1337,16 +1377,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -1367,46 +1407,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1570,16 +1618,16 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE @@ -1600,46 +1648,54 @@ STAGE PLANS: outputColumnNames: ida, vala, idb, valb Statistics: Num rows: 1 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + aggregations: max(length(ida)), avg(COALESCE(length(ida),0)), count(1), count(ida), compute_bit_vector(ida, 'hll'), max(length(vala)), avg(COALESCE(length(vala),0)), count(vala), compute_bit_vector(vala, 'hll'), max(length(idb)), avg(COALESCE(length(idb),0)), count(idb), compute_bit_vector(idb, 'hll'), max(length(valb)), avg(COALESCE(length(valb),0)), count(valb), compute_bit_vector(valb, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out index 998772752c..5a5cbe02db 100644 --- a/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/llap/multigroupby_singlemr.q.out @@ -142,31 +142,35 @@ STAGE PLANS: outputColumnNames: d1, d2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -193,31 +197,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -362,31 +370,35 @@ STAGE PLANS: outputColumnNames: d1, d2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 860 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 892 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -413,31 +425,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1332 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -582,31 +598,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(d4), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -633,31 +653,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -778,16 +802,16 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(d4), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -811,46 +835,54 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(d4), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1021,31 +1053,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3, d4 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll'), min(d4), max(d4), count(d4), compute_bit_vector(d4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1776 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1072,31 +1108,35 @@ STAGE PLANS: outputColumnNames: d1, d2, d3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll'), min(d3), max(d3), count(d3), compute_bit_vector(d3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1336 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1123,31 +1163,35 @@ STAGE PLANS: outputColumnNames: d1, d2 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + aggregations: min(d1), max(d1), count(1), count(d1), compute_bit_vector(d1, 'hll'), min(d2), max(d2), count(d2), compute_bit_vector(d2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 896 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 896 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out b/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out index dda8546f48..4cc19aaa44 100644 --- a/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out +++ b/ql/src/test/results/clientpositive/llap/nonreserved_keywords_insert_into1.q.out @@ -80,31 +80,35 @@ STAGE PLANS: outputColumnNames: key, as Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(as)), avg(COALESCE(length(as),0)), count(as), compute_bit_vector(as, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -220,31 +224,35 @@ STAGE PLANS: outputColumnNames: key, as Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(as)), avg(COALESCE(length(as),0)), count(as), compute_bit_vector(as, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -369,31 +377,35 @@ STAGE PLANS: outputColumnNames: key, as Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(as)), avg(COALESCE(length(as),0)), count(as), compute_bit_vector(as, 'hll') minReductionHashAggr: 0.9 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/notable_alias1.q.out b/ql/src/test/results/clientpositive/llap/notable_alias1.q.out index 8973d87b7a..67469639a9 100644 --- a/ql/src/test/results/clientpositive/llap/notable_alias1.q.out +++ b/ql/src/test/results/clientpositive/llap/notable_alias1.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: dummy, key, value Statistics: Num rows: 83 Data size: 8300 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(dummy)), avg(COALESCE(length(dummy),0)), count(1), count(dummy), compute_bit_vector(dummy, 'hll'), min(key), max(key), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/notable_alias2.q.out b/ql/src/test/results/clientpositive/llap/notable_alias2.q.out index e1b0aa870e..94265151b9 100644 --- a/ql/src/test/results/clientpositive/llap/notable_alias2.q.out +++ b/ql/src/test/results/clientpositive/llap/notable_alias2.q.out @@ -84,31 +84,35 @@ STAGE PLANS: outputColumnNames: dummy, key, value Statistics: Num rows: 83 Data size: 8300 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(dummy)), avg(COALESCE(length(dummy),0)), count(1), count(dummy), compute_bit_vector(dummy, 'hll'), min(key), max(key), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9879518 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_createas1.q.out b/ql/src/test/results/clientpositive/llap/orc_createas1.q.out index fe86c273b6..b223d753de 100644 --- a/ql/src/test/results/clientpositive/llap/orc_createas1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_createas1.q.out @@ -95,33 +95,37 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -304,33 +308,37 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index 3748086552..5e40be5421 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -89,37 +89,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -227,37 +227,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -405,37 +405,37 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index 82fcd88383..ad9fb69aac 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -82,19 +82,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -108,18 +108,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,19 +230,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -256,18 +256,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -423,19 +423,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -449,18 +449,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index 4843ad1b3b..cb384b3735 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -56,19 +56,19 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: one (type: string), two (type: string), three (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 426500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 500 Data size: 426500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -82,18 +82,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 392500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out index 43704b23b5..430ca54bf7 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out index b62276e7f7..b8007f32b8 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out @@ -116,33 +116,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out index 799b7afd53..5a8f2daedb 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -70,33 +70,37 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), _col16 (type: binary), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), _col20 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DECIMAL' (type: string), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'TIMESTAMP' (type: string), _col17 (type: timestamp), _col18 (type: timestamp), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -206,33 +210,37 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), _col16 (type: binary), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), _col20 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DECIMAL' (type: string), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'TIMESTAMP' (type: string), _col17 (type: timestamp), _col18 (type: timestamp), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out index 0b9924680a..417cdd0291 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -70,11 +70,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') keys: year (type: string), hour (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) @@ -82,21 +82,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: binary), _col15 (type: decimal(38,0)), _col16 (type: decimal(38,0)), _col17 (type: bigint), _col18 (type: binary), _col19 (type: timestamp), _col20 (type: timestamp), _col21 (type: bigint), _col22 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: 'LONG' (type: string), _col2 (type: bigint), _col3 (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), _col11 (type: double), _col12 (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DECIMAL' (type: string), _col15 (type: decimal(38,0)), _col16 (type: decimal(38,0)), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), 'TIMESTAMP' (type: string), _col19 (type: timestamp), _col20 (type: timestamp), (_col4 - _col21) (type: bigint), COALESCE(ndv_compute_bit_vector(_col22),0) (type: bigint), _col22 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -259,11 +259,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') keys: year (type: string), hour (type: int) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) @@ -271,21 +271,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: binary), _col15 (type: decimal(38,0)), _col16 (type: decimal(38,0)), _col17 (type: bigint), _col18 (type: binary), _col19 (type: timestamp), _col20 (type: timestamp), _col21 (type: bigint), _col22 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: 'LONG' (type: string), _col2 (type: bigint), _col3 (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), 'DOUBLE' (type: string), _col11 (type: double), _col12 (type: double), (_col4 - _col13) (type: bigint), COALESCE(ndv_compute_bit_vector(_col14),0) (type: bigint), _col14 (type: binary), 'DECIMAL' (type: string), _col15 (type: decimal(38,0)), _col16 (type: decimal(38,0)), (_col4 - _col17) (type: bigint), COALESCE(ndv_compute_bit_vector(_col18),0) (type: bigint), _col18 (type: binary), 'TIMESTAMP' (type: string), _col19 (type: timestamp), _col20 (type: timestamp), (_col4 - _col21) (type: bigint), COALESCE(ndv_compute_bit_vector(_col22),0) (type: bigint), _col22 (type: binary), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index 5fe669389e..87921d386d 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -58,11 +58,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, st Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') keys: st (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -70,7 +70,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), _col17 (type: binary), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), _col21 (type: binary) Reduce Output Operator key expressions: _col5 (type: double) null sort order: a @@ -84,14 +84,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DECIMAL' (type: string), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'TIMESTAMP' (type: string), _col18 (type: timestamp), _col19 (type: timestamp), (_col3 - _col20) (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -295,11 +295,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, st Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') keys: st (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -307,7 +307,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), _col17 (type: binary), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), _col21 (type: binary) Reduce Output Operator key expressions: _col5 (type: double) null sort order: a @@ -321,14 +321,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DECIMAL' (type: string), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'TIMESTAMP' (type: string), _col18 (type: timestamp), _col19 (type: timestamp), (_col3 - _col20) (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index 3215f09ac8..6fd9638ee0 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -82,19 +82,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -108,18 +108,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,19 +230,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -256,18 +256,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -423,19 +423,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 167250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reduce Output Operator key expressions: _col2 (type: string) null sort order: a @@ -449,18 +449,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 250 Data size: 150250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 199750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index 83753ee1eb..2c9a2a7e4c 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -70,33 +70,37 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), _col15 (type: bigint), _col16 (type: binary), _col17 (type: timestamp), _col18 (type: timestamp), _col19 (type: bigint), _col20 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DECIMAL' (type: string), _col13 (type: decimal(38,0)), _col14 (type: decimal(38,0)), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'TIMESTAMP' (type: string), _col17 (type: timestamp), _col18 (type: timestamp), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1536 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index 54dbc75d86..c7196ff960 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -58,11 +58,11 @@ STAGE PLANS: outputColumnNames: userid, string1, subtype, decimal1, ts, st Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + aggregations: min(userid), max(userid), count(1), count(userid), compute_bit_vector(userid, 'hll'), max(length(string1)), avg(COALESCE(length(string1),0)), count(string1), compute_bit_vector(string1, 'hll'), min(subtype), max(subtype), count(subtype), compute_bit_vector(subtype, 'hll'), min(decimal1), max(decimal1), count(decimal1), compute_bit_vector(decimal1, 'hll'), min(ts), max(ts), count(ts), compute_bit_vector(ts, 'hll') keys: st (type: double) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -70,7 +70,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), _col16 (type: bigint), _col17 (type: binary), _col18 (type: timestamp), _col19 (type: timestamp), _col20 (type: bigint), _col21 (type: binary) Reduce Output Operator key expressions: _col5 (type: double) null sort order: a @@ -84,14 +84,14 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'DECIMAL' (type: string), _col14 (type: decimal(38,0)), _col15 (type: decimal(38,0)), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'TIMESTAMP' (type: string), _col18 (type: timestamp), _col19 (type: timestamp), (_col3 - _col20) (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/parallel.q.out b/ql/src/test/results/clientpositive/llap/parallel.q.out index 8548d70937..8aa2d03d7f 100644 --- a/ql/src/test/results/clientpositive/llap/parallel.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel.q.out @@ -109,16 +109,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -137,46 +137,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out b/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out index 26aec2577f..07a73191ed 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out @@ -109,16 +109,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -137,46 +137,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/parallel_join1.q.out b/ql/src/test/results/clientpositive/llap/parallel_join1.q.out index 4a54f24f4e..09cc09f75a 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_join1.q.out @@ -104,31 +104,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out b/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out index f49f8f9503..6dfd711895 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_orderby.q.out @@ -85,17 +85,21 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out b/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out index 00dc5406d7..74aaf42903 100644 --- a/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/partial_column_stats.q.out @@ -38,33 +38,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 588 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/partition_ctas.q.out b/ql/src/test/results/clientpositive/llap/partition_ctas.q.out index 1700007677..fe1b429699 100644 --- a/ql/src/test/results/clientpositive/llap/partition_ctas.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_ctas.q.out @@ -47,19 +47,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 8829 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8829 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -73,18 +73,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/pcr.q.out b/ql/src/test/results/clientpositive/llap/pcr.q.out index d10364c727..423f95a839 100644 --- a/ql/src/test/results/clientpositive/llap/pcr.q.out +++ b/ql/src/test/results/clientpositive/llap/pcr.q.out @@ -3513,19 +3513,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Select Operator expressions: key (type: int), value (type: string) @@ -3562,19 +3562,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.95 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3624,67 +3624,75 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -3851,19 +3859,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Filter Operator isSamplingPred: false @@ -3904,19 +3912,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -3966,67 +3974,75 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out b/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out index d527113b73..866b924bd8 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_constant_expr.q.out @@ -54,33 +54,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3 Statistics: Num rows: 25 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -197,33 +201,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3 Statistics: Num rows: 25 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(c2), max(c2), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out index 6f0c154f6a..d1b71afb93 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_multi_insert.q.out @@ -142,16 +142,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 263 Data size: 24985 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -172,16 +172,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 87 Data size: 8265 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -202,19 +202,19 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 87 Data size: 16008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 263 Data size: 46814 Basic stats: COMPLETE Column stats: COMPLETE @@ -233,48 +233,56 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1555,16 +1563,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 263 Data size: 24985 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -1585,16 +1593,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 87 Data size: 8265 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 87 Data size: 15486 Basic stats: COMPLETE Column stats: COMPLETE @@ -1615,19 +1623,19 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 87 Data size: 16008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll') keys: '2008-04-08' (type: string), '12' (type: string) minReductionHashAggr: 0.9885057 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-04-08' (type: string), '12' (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) - Statistics: Num rows: 1 Data size: 604 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct) + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary) Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 263 Data size: 46814 Basic stats: COMPLETE Column stats: COMPLETE @@ -1646,48 +1654,56 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: '2008-04-08' (type: string), '12' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index cd10396a7d..c0816a46bb 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -3200,31 +3200,35 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, s Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(r), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(dr), compute_bit_vector(dr, 'hll'), min(s), max(s), count(s), compute_bit_vector(s, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: double), _col22 (type: double), _col23 (type: bigint), _col24 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -3328,31 +3332,35 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(s2), max(s2), count(s2), compute_bit_vector(s2, 'hll'), min(r), max(r), count(r), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(dr), compute_bit_vector(dr, 'hll'), min(cud), max(cud), count(cud), compute_bit_vector(cud, 'hll'), min(fv1), max(fv1), count(fv1), compute_bit_vector(fv1, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: int), _col23 (type: bigint), _col24 (type: binary), _col25 (type: double), _col26 (type: double), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: int), _col31 (type: bigint), _col32 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), min(VALUE._col25), max(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), min(VALUE._col29), max(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'LONG' (type: string), UDFToLong(_col21) (type: bigint), UDFToLong(_col22) (type: bigint), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'LONG' (type: string), UDFToLong(_col29) (type: bigint), UDFToLong(_col30) (type: bigint), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47 + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/quote1.q.out b/ql/src/test/results/clientpositive/llap/quote1.q.out index c3867ffa34..c5de7314bd 100644 --- a/ql/src/test/results/clientpositive/llap/quote1.q.out +++ b/ql/src/test/results/clientpositive/llap/quote1.q.out @@ -58,37 +58,37 @@ STAGE PLANS: outputColumnNames: location, type, table Statistics: Num rows: 55 Data size: 10395 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(location, 'hll'), compute_stats(type, 'hll') + aggregations: min(location), max(location), count(1), count(location), compute_bit_vector(location, 'hll'), max(length(type)), avg(COALESCE(length(type),0)), count(type), compute_bit_vector(type, 'hll') keys: table (type: string) minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 426 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out index 13249e91f8..514a1cd7a8 100644 --- a/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/llap/rand_partitionpruner2.q.out @@ -84,19 +84,19 @@ STAGE PLANS: outputColumnNames: key, value, hr, ds Statistics: Num rows: 333 Data size: 151848 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(hr), compute_bit_vector(hr, 'hll'), max(length(ds)), avg(COALESCE(length(ds),0)), count(ds), compute_bit_vector(ds, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -185,34 +185,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out index a680358a46..8426bc1e2b 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out @@ -99,33 +99,37 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index 59f4acd59b..eb2c9126e9 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -56,19 +56,19 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: one (type: string), two (type: string), three (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 426500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 500 Data size: 426500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reduce Output Operator key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa @@ -82,18 +82,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 392500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col3) (type: bigint), UDFToLong(_col4) (type: bigint), (_col5 - _col6) (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), (_col5 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 491500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out index 24af5c74d5..83bb0936b9 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out index 5737ac16b1..f35a07f2b8 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out @@ -98,33 +98,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out b/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out index 52506bfb8c..545f9f9d6a 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_null_value.q.out @@ -178,31 +178,35 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4 Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + aggregations: min(c1), max(c1), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), min(c3), max(c3), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out index 33ebf0cb14..3b60d8a1b1 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -136,34 +136,38 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 + directory: hdfs://### HDFS PATH ### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + Stats Publishing Key Prefix: hdfs://### HDFS PATH ### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -395,12 +399,12 @@ STAGE PLANS: outputColumnNames: aid, bid, t, ctime, etime, l, et, ds Statistics: Num rows: 1 Data size: 462 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(aid, 'hll'), compute_stats(bid, 'hll'), compute_stats(t, 'hll'), compute_stats(ctime, 'hll'), compute_stats(etime, 'hll'), compute_stats(l, 'hll'), compute_stats(et, 'hll') + aggregations: max(length(aid)), avg(COALESCE(length(aid),0)), count(1), count(aid), compute_bit_vector(aid, 'hll'), max(length(bid)), avg(COALESCE(length(bid),0)), count(bid), compute_bit_vector(bid, 'hll'), min(t), max(t), count(t), compute_bit_vector(t, 'hll'), max(length(ctime)), avg(COALESCE(length(ctime),0)), count(ctime), compute_bit_vector(ctime, 'hll'), min(etime), max(etime), count(etime), compute_bit_vector(etime, 'hll'), max(length(l)), avg(COALESCE(length(l),0)), count(l), compute_bit_vector(l, 'hll'), max(length(et)), avg(COALESCE(length(et),0)), count(et), compute_bit_vector(et, 'hll') keys: '2010-03-29' (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator bucketingVersion: 2 key expressions: '2010-03-29' (type: string) @@ -408,39 +412,39 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: '2010-03-29' (type: string) - Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: bigint), _col21 (type: binary), _col22 (type: int), _col23 (type: struct), _col24 (type: bigint), _col25 (type: binary), _col26 (type: int), _col27 (type: struct), _col28 (type: bigint), _col29 (type: binary) auto parallelism: true Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), max(VALUE._col25), avg(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28) keys: '2010-03-29' (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1250 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), 'LONG' (type: string), _col18 (type: bigint), _col19 (type: bigint), (_col3 - _col20) (type: bigint), COALESCE(ndv_compute_bit_vector(_col21),0) (type: bigint), _col21 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col22,0)) (type: bigint), COALESCE(_col23,0) (type: double), (_col3 - _col24) (type: bigint), COALESCE(ndv_compute_bit_vector(_col25),0) (type: bigint), _col25 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col26,0)) (type: bigint), COALESCE(_col27,0) (type: double), (_col3 - _col28) (type: bigint), COALESCE(ndv_compute_bit_vector(_col29),0) (type: bigint), _col29 (type: binary), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 directory: hdfs://### HDFS PATH ### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: PARTIAL Stats Publishing Key Prefix: hdfs://### HDFS PATH ### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types struct:struct:struct:struct:struct:struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24,_col25,_col26,_col27,_col28,_col29,_col30,_col31,_col32,_col33,_col34,_col35,_col36,_col37,_col38,_col39,_col40,_col41,_col42 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/sample1.q.out b/ql/src/test/results/clientpositive/llap/sample1.q.out index b257edc45c..19c91a057c 100644 --- a/ql/src/test/results/clientpositive/llap/sample1.q.out +++ b/ql/src/test/results/clientpositive/llap/sample1.q.out @@ -82,19 +82,19 @@ STAGE PLANS: outputColumnNames: key, value, dt, hr Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll'), max(length(dt)), avg(COALESCE(length(dt),0)), count(dt), compute_bit_vector(dt, 'hll'), max(length(hr)), avg(COALESCE(length(hr),0)), count(hr), compute_bit_vector(hr, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -145,34 +145,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1062 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sample5.q.out b/ql/src/test/results/clientpositive/llap/sample5.q.out index e870e8c9b2..43361b17d4 100644 --- a/ql/src/test/results/clientpositive/llap/sample5.q.out +++ b/ql/src/test/results/clientpositive/llap/sample5.q.out @@ -79,19 +79,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -140,34 +140,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sample6.q.out b/ql/src/test/results/clientpositive/llap/sample6.q.out index 71141024c2..4738af70fa 100644 --- a/ql/src/test/results/clientpositive/llap/sample6.q.out +++ b/ql/src/test/results/clientpositive/llap/sample6.q.out @@ -78,19 +78,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -139,34 +139,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sample7.q.out b/ql/src/test/results/clientpositive/llap/sample7.q.out index e217faa782..24114acd85 100644 --- a/ql/src/test/results/clientpositive/llap/sample7.q.out +++ b/ql/src/test/results/clientpositive/llap/sample7.q.out @@ -80,19 +80,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 400 Data size: 38000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -141,34 +141,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 10e3ba040a..9c502dc5da 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -3567,31 +3567,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3918,31 +3922,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out index fcae6cae9a..1c64bd6d0e 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out @@ -494,31 +494,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4, col5, col6 Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll'), max(length(col4)), avg(COALESCE(length(col4),0)), count(col4), compute_bit_vector(col4, 'hll'), min(col5), max(col5), count(col5), compute_bit_vector(col5, 'hll'), min(col6), max(col6), count(col6), compute_bit_vector(col6, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: binary), _col21 (type: double), _col22 (type: double), _col23 (type: bigint), _col24 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'DOUBLE' (type: string), _col17 (type: double), _col18 (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out b/ql/src/test/results/clientpositive/llap/show_functions.q.out index 36c868d513..7ddf5f91fe 100644 --- a/ql/src/test/results/clientpositive/llap/show_functions.q.out +++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out @@ -72,6 +72,7 @@ chr coalesce collect_list collect_set +compute_bit_vector compute_stats concat concat_ws @@ -259,6 +260,7 @@ month months_between murmur_hash named_struct +ndv_compute_bit_vector negative next_day ngrams @@ -425,6 +427,7 @@ chr coalesce collect_list collect_set +compute_bit_vector compute_stats concat concat_ws @@ -581,6 +584,7 @@ chr coalesce collect_list collect_set +compute_bit_vector compute_stats concat concat_ws @@ -768,6 +772,7 @@ month months_between murmur_hash named_struct +ndv_compute_bit_vector negative next_day ngrams diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out index 175219991c..c99b7debc8 100644 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out @@ -405,31 +405,35 @@ STAGE PLANS: outputColumnNames: category, _c1 Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(category, 'hll'), compute_stats(_c1, 'hll') + aggregations: max(length(category)), avg(COALESCE(length(category),0)), count(1), count(category), compute_bit_vector(category, 'hll'), max(length(_c1)), avg(COALESCE(length(_c1),0)), count(_c1) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'BINARY' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out index d9b72b079b..2b8b8b9725 100644 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out @@ -416,31 +416,35 @@ STAGE PLANS: outputColumnNames: category, _c1, _c2 Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(category, 'hll'), compute_stats(_c1, 'hll'), compute_stats(_c2, 'hll') + aggregations: max(length(category)), avg(COALESCE(length(category),0)), count(1), count(category), compute_bit_vector(category, 'hll'), max(length(_c1)), avg(COALESCE(length(_c1),0)), count(_c1), min(_c2), max(_c2), count(_c2), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'BINARY' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), (_col2 - _col10) (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out index 126aee61cb..ff079587fd 100644 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out @@ -113,31 +113,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -279,31 +283,35 @@ STAGE PLANS: outputColumnNames: _c0, category, _c2 Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(_c0, 'hll'), compute_stats(category, 'hll'), compute_stats(_c2, 'hll') + aggregations: max(length(_c0)), avg(COALESCE(length(_c0),0)), count(1), count(_c0), compute_bit_vector(_c0, 'hll'), max(length(category)), avg(COALESCE(length(category),0)), count(category), compute_bit_vector(category, 'hll'), min(_c2), max(_c2), count(_c2), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index c8b827d50b..5ed8d62a3b 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -168,31 +168,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out b/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out index 8929b61d53..170f04c3a4 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin_noskew.q.out @@ -117,31 +117,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 30 Data size: 5340 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.96666664 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out b/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out index c25ab387b4..ec2bed5879 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin_onesideskew.q.out @@ -169,31 +169,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 3 Data size: 522 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out index 80a9888ca8..0a8d50e206 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out @@ -290,35 +290,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index c1efe543af..b78aa11bf2 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -129,32 +129,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 825 Data size: 156750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(v1), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(v2), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1336,32 +1340,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 825 Data size: 156750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(v1), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(v2), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2559,32 +2567,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(v1), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(v2), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2698,32 +2710,36 @@ STAGE PLANS: outputColumnNames: k1, v1, k2, v2 Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(v1)), avg(COALESCE(length(v1),0)), count(v1), compute_bit_vector(v1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll'), max(length(v2)), avg(COALESCE(length(v2),0)), count(v2), compute_bit_vector(v2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 8384a6c851..0d6f38a05d 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -215,31 +215,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -460,31 +464,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out index d40aa94387..20fbb29cd2 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out @@ -269,31 +269,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -815,31 +819,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1119,31 +1127,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1423,31 +1435,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1727,31 +1743,35 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2098,31 +2118,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -2431,31 +2455,35 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/stats0.q.out b/ql/src/test/results/clientpositive/llap/stats0.q.out index 2b08b61c17..6be81cfce3 100644 --- a/ql/src/test/results/clientpositive/llap/stats0.q.out +++ b/ql/src/test/results/clientpositive/llap/stats0.q.out @@ -75,19 +75,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -133,34 +133,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -782,37 +786,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 131500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 421 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1493,19 +1497,19 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -1551,34 +1555,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -2200,37 +2208,37 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 131500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 557 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 421 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 617 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats1.q.out b/ql/src/test/results/clientpositive/llap/stats1.q.out index e1c38ed13a..25367f5a31 100644 --- a/ql/src/test/results/clientpositive/llap/stats1.q.out +++ b/ql/src/test/results/clientpositive/llap/stats1.q.out @@ -84,16 +84,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -121,31 +121,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/stats10.q.out b/ql/src/test/results/clientpositive/llap/stats10.q.out index 5e0fb15365..58b83994f4 100644 --- a/ql/src/test/results/clientpositive/llap/stats10.q.out +++ b/ql/src/test/results/clientpositive/llap/stats10.q.out @@ -71,35 +71,35 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats11.q.out b/ql/src/test/results/clientpositive/llap/stats11.q.out index b69b8e2c79..40836268c5 100644 --- a/ql/src/test/results/clientpositive/llap/stats11.q.out +++ b/ql/src/test/results/clientpositive/llap/stats11.q.out @@ -518,53 +518,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -932,53 +936,57 @@ STAGE PLANS: outputColumnNames: key, value1, value2 Statistics: Num rows: 134 Data size: 20826 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 704 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/stats4.q.out b/ql/src/test/results/clientpositive/llap/stats4.q.out index 2a6ca49af9..a53ea2e13b 100644 --- a/ql/src/test/results/clientpositive/llap/stats4.q.out +++ b/ql/src/test/results/clientpositive/llap/stats4.q.out @@ -96,19 +96,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -129,37 +129,37 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '2008-12-31' (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 2 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,18 +168,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: '2008-12-31' (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 2 Data size: 1228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out index d9bbe8a572..3053999a8a 100644 --- a/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_empty_dyn_part.q.out @@ -53,37 +53,37 @@ STAGE PLANS: outputColumnNames: key, part Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll') keys: part (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 331 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 331 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 263 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 531 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out b/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out index b54c2628b0..1dda9463b5 100644 --- a/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_nonpart.q.out @@ -153,33 +153,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out b/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out index c160d085f7..473adabfc6 100644 --- a/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_sizebug.q.out @@ -91,33 +91,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out index 5c5e5c3da9..51a630ee30 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multiinsert.q.out @@ -231,17 +231,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -266,31 +270,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 131 Data size: 23318 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -628,16 +636,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 131 Data size: 23318 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Map Join Operator condition map: Left Semi Join 0 to 1 @@ -741,17 +749,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -772,17 +784,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/tablevalues.q.out b/ql/src/test/results/clientpositive/llap/tablevalues.q.out index b936fa3e4b..9c70c78d34 100644 --- a/ql/src/test/results/clientpositive/llap/tablevalues.q.out +++ b/ql/src/test/results/clientpositive/llap/tablevalues.q.out @@ -79,33 +79,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out index e52adde9cd..84cfe57665 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out @@ -186,33 +186,37 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -258,19 +262,19 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + aggregations: max(length(sourceip)), avg(COALESCE(length(sourceip),0)), count(1), count(sourceip), compute_bit_vector(sourceip, 'hll'), min(avgtimeonsite), max(avgtimeonsite), count(avgtimeonsite), compute_bit_vector(avgtimeonsite, 'hll'), min(adrevenue), max(adrevenue), count(adrevenue), compute_bit_vector(adrevenue, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator bucketingVersion: 2 null sort order: numBuckets: -1 sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: float), _col10 (type: float), _col11 (type: bigint), _col12 (type: binary) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -320,34 +324,38 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col9) (type: double), UDFToDouble(_col10) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary:string:double:double:bigint:bigint:binary + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Stats Work @@ -542,33 +550,37 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + aggregations: min(a), max(a), count(1), count(a), compute_bit_vector(a, 'hll'), min(b), max(b), count(b), compute_bit_vector(b, 'hll'), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector(c, 'hll'), count(CASE WHEN (d is true) THEN (1) ELSE (null) END), count(CASE WHEN (d is false) THEN (1) ELSE (null) END), count(d), max(length(e)), avg(COALESCE(length(e),0)), count(e) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: int), _col17 (type: struct), _col18 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), count(VALUE._col13), count(VALUE._col14), count(VALUE._col15), max(VALUE._col16), avg(VALUE._col17), count(VALUE._col18) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'BOOLEAN' (type: string), _col13 (type: bigint), _col14 (type: bigint), (_col2 - _col15) (type: bigint), 'BINARY' (type: string), UDFToLong(COALESCE(_col16,0)) (type: bigint), COALESCE(_col17,0) (type: double), (_col2 - _col18) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out index 0a92907e29..b3df036388 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_insert1_overwrite_partitions.q.out @@ -106,11 +106,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -118,19 +118,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -450,11 +450,11 @@ STAGE PLANS: outputColumnNames: one, two, ds, hr Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -462,19 +462,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 5 Data size: 1595 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 2 Data size: 638 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out index e56f9ebbea..c5a06e4855 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_insert2_overwrite_partitions.q.out @@ -117,11 +117,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -129,19 +129,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -277,11 +277,11 @@ STAGE PLANS: outputColumnNames: one, two, ds Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + aggregations: max(length(one)), avg(COALESCE(length(one),0)), count(1), count(one), compute_bit_vector(one, 'hll'), max(length(two)), avg(COALESCE(length(two),0)), count(two), compute_bit_vector(two, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -289,19 +289,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 1630 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2 Data size: 652 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out index abb4f779eb..a822699b29 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_merge_dynamic_partition.q.out @@ -715,11 +715,11 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -727,21 +727,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 99 Data size: 31648 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 49 Data size: 15664 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index bdc6ee6f7b..b5c14caa8c 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -84,17 +84,21 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -501,37 +505,37 @@ STAGE PLANS: outputColumnNames: c, d Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c, 'hll') + aggregations: max(length(c)), avg(COALESCE(length(c),0)), count(1), count(c), compute_bit_vector(c, 'hll') keys: d (type: int) minReductionHashAggr: 0.98381877 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 5 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -978,16 +982,16 @@ STAGE PLANS: outputColumnNames: c, d Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(c), max(c), count(1), count(c), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(d), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Filter Operator predicate: ((key % 2) = 1) (type: boolean) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE @@ -1008,48 +1012,56 @@ STAGE PLANS: outputColumnNames: c, d Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + aggregations: min(c), max(c), count(1), count(c), compute_bit_vector(c, 'hll'), max(length(d)), avg(COALESCE(length(d),0)), count(d), compute_bit_vector(d, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index 8eb76b87ac..62547749b6 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -89,19 +89,19 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(1), count(id1), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -130,37 +130,37 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(1), count(id1), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out index eac9094739..b5512f7ad7 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out @@ -102,19 +102,19 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(1), count(id1), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -143,37 +143,37 @@ STAGE PLANS: outputColumnNames: id1, part1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(id1, 'hll') + aggregations: min(id1), max(id1), count(1), count(id1), compute_bit_vector(id1, 'hll') keys: part1 (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 8b7e705955..a3cb8463ac 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -226,31 +226,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 85952 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -277,31 +281,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -1187,31 +1195,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 85952 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -1238,31 +1250,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2182,31 +2198,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 316 Data size: 85952 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2233,31 +2253,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3169,31 +3193,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -3220,31 +3248,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -4102,16 +4134,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -4138,46 +4170,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/udf1.q.out b/ql/src/test/results/clientpositive/llap/udf1.q.out index 05e23f0068..165207338d 100644 --- a/ql/src/test/results/clientpositive/llap/udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/udf1.q.out @@ -76,33 +76,37 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 Statistics: Num rows: 250 Data size: 442000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll'), compute_stats(c12, 'hll'), compute_stats(c13, 'hll'), compute_stats(c14, 'hll'), compute_stats(c15, 'hll'), compute_stats(c16, 'hll'), compute_stats(c17, 'hll'), compute_stats(c18, 'hll'), compute_stats(c19, 'hll'), compute_stats(c20, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), max(length(c3)), avg(COALESCE(length(c3),0)), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll'), max(length(c5)), avg(COALESCE(length(c5),0)), count(c5), compute_bit_vector(c5, 'hll'), max(length(c6)), avg(COALESCE(length(c6),0)), count(c6), compute_bit_vector(c6, 'hll'), max(length(c7)), avg(COALESCE(length(c7),0)), count(c7), compute_bit_vector(c7, 'hll'), max(length(c8)), avg(COALESCE(length(c8),0)), count(c8), compute_bit_vector(c8, 'hll'), max(length(c9)), avg(COALESCE(length(c9),0)), count(c9), compute_bit_vector(c9, 'hll'), max(length(c10)), avg(COALESCE(length(c10),0)), count(c10), compute_bit_vector(c10, 'hll'), max(length(c11)), avg(COALESCE(length(c11),0)), count(c11), compute_bit_vector(c11, 'hll'), max(length(c12)), avg(COALESCE(length(c12),0)), count(c12), compute_bit_vector(c12, 'hll'), max(length(c13)), avg(COALESCE(length(c13),0)), count(c13), compute_bit_vector(c13, 'hll'), max(length(c14)), avg(COALESCE(length(c14),0)), count(c14), compute_bit_vector(c14, 'hll'), max(length(c15)), avg(COALESCE(length(c15),0)), count(c15), compute_bit_vector(c15, 'hll'), max(length(c16)), avg(COALESCE(length(c16),0)), count(c16), compute_bit_vector(c16, 'hll'), max(length(c17)), avg(COALESCE(length(c17),0)), count(c17), compute_bit_vector(c17, 'hll'), max(length(c18)), avg(COALESCE(length(c18),0)), count(c18), compute_bit_vector(c18, 'hll'), max(length(c19)), avg(COALESCE(length(c19),0)), count(c19), compute_bit_vector(c19, 'hll'), max(length(c20)), avg(COALESCE(length(c20),0)), count(c20), compute_bit_vector(c20, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65, _col66, _col67, _col68, _col69, _col70, _col71, _col72, _col73, _col74, _col75, _col76, _col77, _col78, _col79, _col80 + Statistics: Num rows: 1 Data size: 4648 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Statistics: Num rows: 1 Data size: 4648 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: struct), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: struct), _col23 (type: bigint), _col24 (type: binary), _col25 (type: int), _col26 (type: struct), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: struct), _col31 (type: bigint), _col32 (type: binary), _col33 (type: int), _col34 (type: struct), _col35 (type: bigint), _col36 (type: binary), _col37 (type: int), _col38 (type: struct), _col39 (type: bigint), _col40 (type: binary), _col41 (type: int), _col42 (type: struct), _col43 (type: bigint), _col44 (type: binary), _col45 (type: int), _col46 (type: struct), _col47 (type: bigint), _col48 (type: binary), _col49 (type: int), _col50 (type: struct), _col51 (type: bigint), _col52 (type: binary), _col53 (type: int), _col54 (type: struct), _col55 (type: bigint), _col56 (type: binary), _col57 (type: int), _col58 (type: struct), _col59 (type: bigint), _col60 (type: binary), _col61 (type: int), _col62 (type: struct), _col63 (type: bigint), _col64 (type: binary), _col65 (type: int), _col66 (type: struct), _col67 (type: bigint), _col68 (type: binary), _col69 (type: int), _col70 (type: struct), _col71 (type: bigint), _col72 (type: binary), _col73 (type: int), _col74 (type: struct), _col75 (type: bigint), _col76 (type: binary), _col77 (type: int), _col78 (type: struct), _col79 (type: bigint), _col80 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), max(VALUE._col17), avg(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), max(VALUE._col21), avg(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), max(VALUE._col25), avg(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), max(VALUE._col29), avg(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32), max(VALUE._col33), avg(VALUE._col34), count(VALUE._col35), compute_bit_vector(VALUE._col36), max(VALUE._col37), avg(VALUE._col38), count(VALUE._col39), compute_bit_vector(VALUE._col40), max(VALUE._col41), avg(VALUE._col42), count(VALUE._col43), compute_bit_vector(VALUE._col44), max(VALUE._col45), avg(VALUE._col46), count(VALUE._col47), compute_bit_vector(VALUE._col48), max(VALUE._col49), avg(VALUE._col50), count(VALUE._col51), compute_bit_vector(VALUE._col52), max(VALUE._col53), avg(VALUE._col54), count(VALUE._col55), compute_bit_vector(VALUE._col56), max(VALUE._col57), avg(VALUE._col58), count(VALUE._col59), compute_bit_vector(VALUE._col60), max(VALUE._col61), avg(VALUE._col62), count(VALUE._col63), compute_bit_vector(VALUE._col64), max(VALUE._col65), avg(VALUE._col66), count(VALUE._col67), compute_bit_vector(VALUE._col68), max(VALUE._col69), avg(VALUE._col70), count(VALUE._col71), compute_bit_vector(VALUE._col72), max(VALUE._col73), avg(VALUE._col74), count(VALUE._col75), compute_bit_vector(VALUE._col76), max(VALUE._col77), avg(VALUE._col78), count(VALUE._col79), compute_bit_vector(VALUE._col80) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65, _col66, _col67, _col68, _col69, _col70, _col71, _col72, _col73, _col74, _col75, _col76, _col77, _col78, _col79, _col80 + Statistics: Num rows: 1 Data size: 3288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col17,0)) (type: bigint), COALESCE(_col18,0) (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col21,0)) (type: bigint), COALESCE(_col22,0) (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col25,0)) (type: bigint), COALESCE(_col26,0) (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col29,0)) (type: bigint), COALESCE(_col30,0) (type: double), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col33,0)) (type: bigint), COALESCE(_col34,0) (type: double), (_col2 - _col35) (type: bigint), COALESCE(ndv_compute_bit_vector(_col36),0) (type: bigint), _col36 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col37,0)) (type: bigint), COALESCE(_col38,0) (type: double), (_col2 - _col39) (type: bigint), COALESCE(ndv_compute_bit_vector(_col40),0) (type: bigint), _col40 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col41,0)) (type: bigint), COALESCE(_col42,0) (type: double), (_col2 - _col43) (type: bigint), COALESCE(ndv_compute_bit_vector(_col44),0) (type: bigint), _col44 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col45,0)) (type: bigint), COALESCE(_col46,0) (type: double), (_col2 - _col47) (type: bigint), COALESCE(ndv_compute_bit_vector(_col48),0) (type: bigint), _col48 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col49,0)) (type: bigint), COALESCE(_col50,0) (type: double), (_col2 - _col51) (type: bigint), COALESCE(ndv_compute_bit_vector(_col52),0) (type: bigint), _col52 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col53,0)) (type: bigint), COALESCE(_col54,0) (type: double), (_col2 - _col55) (type: bigint), COALESCE(ndv_compute_bit_vector(_col56),0) (type: bigint), _col56 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col57,0)) (type: bigint), COALESCE(_col58,0) (type: double), (_col2 - _col59) (type: bigint), COALESCE(ndv_compute_bit_vector(_col60),0) (type: bigint), _col60 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col61,0)) (type: bigint), COALESCE(_col62,0) (type: double), (_col2 - _col63) (type: bigint), COALESCE(ndv_compute_bit_vector(_col64),0) (type: bigint), _col64 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col65,0)) (type: bigint), COALESCE(_col66,0) (type: double), (_col2 - _col67) (type: bigint), COALESCE(ndv_compute_bit_vector(_col68),0) (type: bigint), _col68 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col69,0)) (type: bigint), COALESCE(_col70,0) (type: double), (_col2 - _col71) (type: bigint), COALESCE(ndv_compute_bit_vector(_col72),0) (type: bigint), _col72 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col73,0)) (type: bigint), COALESCE(_col74,0) (type: double), (_col2 - _col75) (type: bigint), COALESCE(ndv_compute_bit_vector(_col76),0) (type: bigint), _col76 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col77,0)) (type: bigint), COALESCE(_col78,0) (type: double), (_col2 - _col79) (type: bigint), COALESCE(ndv_compute_bit_vector(_col80),0) (type: bigint), _col80 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62, _col63, _col64, _col65, _col66, _col67, _col68, _col69, _col70, _col71, _col72, _col73, _col74, _col75, _col76, _col77, _col78, _col79, _col80, _col81, _col82, _col83, _col84, _col85, _col86, _col87, _col88, _col89, _col90, _col91, _col92, _col93, _col94, _col95, _col96, _col97, _col98, _col99, _col100, _col101, _col102, _col103, _col104, _col105, _col106, _col107, _col108, _col109, _col110, _col111, _col112, _col113, _col114, _col115, _col116, _col117, _col118, _col119 + Statistics: Num rows: 1 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf3.q.out b/ql/src/test/results/clientpositive/llap/udf3.q.out index a3c993b781..b3946c30da 100644 --- a/ql/src/test/results/clientpositive/llap/udf3.q.out +++ b/ql/src/test/results/clientpositive/llap/udf3.q.out @@ -77,17 +77,21 @@ STAGE PLANS: outputColumnNames: c1, c2, c3, c4, c5 Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), max(length(c2)), avg(COALESCE(length(c2),0)), count(c2), compute_bit_vector(c2, 'hll'), max(length(c3)), avg(COALESCE(length(c3),0)), count(c3), compute_bit_vector(c3, 'hll'), max(length(c4)), avg(COALESCE(length(c4),0)), count(c4), compute_bit_vector(c4, 'hll'), max(length(c5)), avg(COALESCE(length(c5),0)), count(c5), compute_bit_vector(c5, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col13,0)) (type: bigint), COALESCE(_col14,0) (type: double), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col17,0)) (type: bigint), COALESCE(_col18,0) (type: double), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 1 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out b/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out index ddf22640a1..f1ddf4df3e 100644 --- a/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_10_trims.q.out @@ -62,33 +62,37 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_character_length.q.out b/ql/src/test/results/clientpositive/llap/udf_character_length.q.out index e4c22c6567..5728478da8 100644 --- a/ql/src/test/results/clientpositive/llap/udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_character_length.q.out @@ -82,33 +82,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: min(len), max(len), count(1), count(len), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_length.q.out b/ql/src/test/results/clientpositive/llap/udf_length.q.out index 7a5bfba7a9..a2c8382d35 100644 --- a/ql/src/test/results/clientpositive/llap/udf_length.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_length.q.out @@ -65,33 +65,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: min(len), max(len), count(1), count(len), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out b/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out index f0117794f6..ed201c30c6 100644 --- a/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_octet_length.q.out @@ -65,33 +65,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: min(len), max(len), count(1), count(len), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/udf_reverse.q.out b/ql/src/test/results/clientpositive/llap/udf_reverse.q.out index 03e3a2126c..a07a862a53 100644 --- a/ql/src/test/results/clientpositive/llap/udf_reverse.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_reverse.q.out @@ -65,33 +65,37 @@ STAGE PLANS: outputColumnNames: len Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(len, 'hll') + aggregations: max(length(len)), avg(COALESCE(length(len),0)), count(1), count(len), compute_bit_vector(len, 'hll') minReductionHashAggr: 0.96 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/union10.q.out b/ql/src/test/results/clientpositive/llap/union10.q.out index d139bea3a8..41292b8bed 100644 --- a/ql/src/test/results/clientpositive/llap/union10.q.out +++ b/ql/src/test/results/clientpositive/llap/union10.q.out @@ -132,31 +132,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -186,16 +190,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -225,16 +229,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union12.q.out b/ql/src/test/results/clientpositive/llap/union12.q.out index 480848068a..63b9e8dea4 100644 --- a/ql/src/test/results/clientpositive/llap/union12.q.out +++ b/ql/src/test/results/clientpositive/llap/union12.q.out @@ -136,31 +136,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -190,16 +194,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -229,16 +233,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.6666666 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union17.q.out b/ql/src/test/results/clientpositive/llap/union17.q.out index 1b32388013..ecbf3d1b0b 100644 --- a/ql/src/test/results/clientpositive/llap/union17.q.out +++ b/ql/src/test/results/clientpositive/llap/union17.q.out @@ -176,31 +176,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -227,31 +231,35 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union18.q.out b/ql/src/test/results/clientpositive/llap/union18.q.out index 2538051311..0d26c6548f 100644 --- a/ql/src/test/results/clientpositive/llap/union18.q.out +++ b/ql/src/test/results/clientpositive/llap/union18.q.out @@ -95,16 +95,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -122,16 +122,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -159,16 +159,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -186,46 +186,54 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union19.q.out b/ql/src/test/results/clientpositive/llap/union19.q.out index 966a053370..f7c8fe87fa 100644 --- a/ql/src/test/results/clientpositive/llap/union19.q.out +++ b/ql/src/test/results/clientpositive/llap/union19.q.out @@ -114,16 +114,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -169,16 +169,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -205,46 +205,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union22.q.out b/ql/src/test/results/clientpositive/llap/union22.q.out index 7167f7d085..e51e9711ce 100644 --- a/ql/src/test/results/clientpositive/llap/union22.q.out +++ b/ql/src/test/results/clientpositive/llap/union22.q.out @@ -156,12 +156,12 @@ STAGE PLANS: outputColumnNames: k1, k2, k3, k4, ds Statistics: Num rows: 387 Data size: 141297 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(k2), compute_bit_vector(k2, 'hll'), max(length(k3)), avg(COALESCE(length(k3),0)), count(k3), compute_bit_vector(k3, 'hll'), max(length(k4)), avg(COALESCE(length(k4),0)), count(k4), compute_bit_vector(k4, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -169,9 +169,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1021 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -286,12 +286,12 @@ STAGE PLANS: outputColumnNames: k1, k2, k3, k4, ds Statistics: Num rows: 387 Data size: 141297 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(k2), compute_bit_vector(k2, 'hll'), max(length(k3)), avg(COALESCE(length(k3),0)), count(k3), compute_bit_vector(k3, 'hll'), max(length(k4)), avg(COALESCE(length(k4),0)), count(k4), compute_bit_vector(k4, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -299,9 +299,9 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1021 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -420,30 +420,30 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 749 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git a/ql/src/test/results/clientpositive/llap/union25.q.out b/ql/src/test/results/clientpositive/llap/union25.q.out index 144582195d..3f63e76a62 100644 --- a/ql/src/test/results/clientpositive/llap/union25.q.out +++ b/ql/src/test/results/clientpositive/llap/union25.q.out @@ -188,31 +188,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 408 Data size: 75888 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/union28.q.out b/ql/src/test/results/clientpositive/llap/union28.q.out index 46505a54d8..2e1e015d53 100644 --- a/ql/src/test/results/clientpositive/llap/union28.q.out +++ b/ql/src/test/results/clientpositive/llap/union28.q.out @@ -79,16 +79,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -141,17 +141,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -177,16 +181,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -212,16 +216,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union29.q.out b/ql/src/test/results/clientpositive/llap/union29.q.out index 09bba31bbe..cad6eedd4a 100644 --- a/ql/src/test/results/clientpositive/llap/union29.q.out +++ b/ql/src/test/results/clientpositive/llap/union29.q.out @@ -79,16 +79,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -117,16 +117,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -155,33 +155,37 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union30.q.out b/ql/src/test/results/clientpositive/llap/union30.q.out index 37cb6d6764..8b77c68fbf 100644 --- a/ql/src/test/results/clientpositive/llap/union30.q.out +++ b/ql/src/test/results/clientpositive/llap/union30.q.out @@ -94,16 +94,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -132,16 +132,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 5 @@ -194,17 +194,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -230,16 +234,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -265,16 +269,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union31.q.out b/ql/src/test/results/clientpositive/llap/union31.q.out index 48df6c38a7..9eaab44c2b 100644 --- a/ql/src/test/results/clientpositive/llap/union31.q.out +++ b/ql/src/test/results/clientpositive/llap/union31.q.out @@ -217,31 +217,35 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -268,31 +272,35 @@ STAGE PLANS: outputColumnNames: value, cnt Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -557,16 +565,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -590,46 +598,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -955,16 +971,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -988,46 +1004,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union33.q.out b/ql/src/test/results/clientpositive/llap/union33.q.out index 8fd7573715..506b27b60b 100644 --- a/ql/src/test/results/clientpositive/llap/union33.q.out +++ b/ql/src/test/results/clientpositive/llap/union33.q.out @@ -71,16 +71,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map 4 @@ -112,17 +112,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -165,16 +169,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Union 2 Vertex: Union 2 @@ -322,16 +326,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -376,31 +380,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/union4.q.out b/ql/src/test/results/clientpositive/llap/union4.q.out index 6e412cb773..b0f35ca52f 100644 --- a/ql/src/test/results/clientpositive/llap/union4.q.out +++ b/ql/src/test/results/clientpositive/llap/union4.q.out @@ -107,31 +107,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -161,16 +165,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union6.q.out b/ql/src/test/results/clientpositive/llap/union6.q.out index 5a533b5267..0cae3cbe7f 100644 --- a/ql/src/test/results/clientpositive/llap/union6.q.out +++ b/ql/src/test/results/clientpositive/llap/union6.q.out @@ -82,16 +82,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -119,31 +119,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 5a5396e10d..a017f807f8 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -181,31 +181,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized Reduce Operator Tree: @@ -488,31 +492,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized Reduce Operator Tree: @@ -1177,16 +1185,16 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -1213,46 +1221,54 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 34500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2096,16 +2112,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 69000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -2123,46 +2139,54 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3016,16 +3040,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 34500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -3043,46 +3067,54 @@ STAGE PLANS: outputColumnNames: key, val1, val2 Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(val1)), avg(COALESCE(length(val1),0)), count(val1), compute_bit_vector(val1, 'hll'), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector(val2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -4181,12 +4213,12 @@ STAGE PLANS: outputColumnNames: k1, k2, k3, k4, ds Statistics: Num rows: 387 Data size: 141297 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + aggregations: max(length(k1)), avg(COALESCE(length(k1),0)), count(1), count(k1), compute_bit_vector(k1, 'hll'), max(length(k2)), avg(COALESCE(length(k2),0)), count(k2), compute_bit_vector(k2, 'hll'), max(length(k3)), avg(COALESCE(length(k3),0)), count(k3), compute_bit_vector(k3, 'hll'), max(length(k4)), avg(COALESCE(length(k4),0)), count(k4), compute_bit_vector(k4, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -4194,39 +4226,39 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1021 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary), _col14 (type: int), _col15 (type: struct), _col16 (type: bigint), _col17 (type: binary) auto parallelism: true Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 749 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col14,0)) (type: bigint), COALESCE(_col15,0) (type: double), (_col3 - _col16) (type: bigint), COALESCE(ndv_compute_bit_vector(_col17),0) (type: bigint), _col17 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 directory: hdfs://### HDFS PATH ### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1845 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1149 Basic stats: COMPLETE Column stats: COMPLETE Stats Publishing Key Prefix: hdfs://### HDFS PATH ### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: bucketing_version -1 - columns _col0,_col1,_col2,_col3,_col4 - columns.types struct:struct:struct:struct:string + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24 + columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -6780,31 +6812,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 187 Data size: 34782 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -8509,31 +8545,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized Reduce Operator Tree: @@ -8811,31 +8851,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -9511,31 +9555,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized Reduce Operator Tree: @@ -9835,16 +9883,16 @@ STAGE PLANS: outputColumnNames: key, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Select Operator expressions: _col1 (type: string) outputColumnNames: _col1 @@ -9867,17 +9915,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -9904,31 +9956,35 @@ STAGE PLANS: outputColumnNames: value, cnt Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(value)), avg(COALESCE(length(value),0)), count(1), count(value), compute_bit_vector(value, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -10212,16 +10268,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -10245,46 +10301,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Execution mode: vectorized Reduce Operator Tree: @@ -10604,16 +10668,16 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -10637,46 +10701,54 @@ STAGE PLANS: outputColumnNames: c1, cnt Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + aggregations: max(length(c1)), avg(COALESCE(length(c1),0)), count(1), count(c1), compute_bit_vector(c1, 'hll'), min(cnt), max(cnt), count(cnt), compute_bit_vector(cnt, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -11666,31 +11738,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized Reduce Operator Tree: @@ -11908,31 +11984,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out index 2356da645f..6738afbb4d 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out @@ -262,31 +262,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 13 Data size: 3536 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.9230769 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/union_lateralview.q.out b/ql/src/test/results/clientpositive/llap/union_lateralview.q.out index d9d4611d78..3d37dbe987 100644 --- a/ql/src/test/results/clientpositive/llap/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/llap/union_lateralview.q.out @@ -215,31 +215,35 @@ STAGE PLANS: outputColumnNames: key, arr_ele, value Statistics: Num rows: 3104 Data size: 294880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(arr_ele, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), min(arr_ele), max(arr_ele), count(arr_ele), compute_bit_vector(arr_ele, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/llap/union_top_level.q.out b/ql/src/test/results/clientpositive/llap/union_top_level.q.out index e0d86b3989..f846cb2fc1 100644 --- a/ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -554,31 +554,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -606,16 +610,16 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -643,16 +647,16 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 @@ -862,31 +866,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -914,16 +922,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -951,16 +959,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 @@ -1158,31 +1166,35 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1210,16 +1222,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -1247,16 +1259,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value), max(value), count(value), compute_bit_vector(value, 'hll') minReductionHashAggr: 0.8888889 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out b/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out index 9e8d6a07cf..7667c9eb2a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out @@ -80,44 +80,48 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -254,44 +258,48 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 958 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: no inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 494 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out index 738ba89bb2..1667840056 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out @@ -719,7 +719,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -739,17 +739,21 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll') mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(11,5)), _col1 (type: decimal(11,5)), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index f8d6e2d069..34daf8169a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -167,46 +167,51 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueExpressions: StringLength(col 0:string) -> 1:int, VectorCoalesce(columns [2, 3])(children: StringLength(col 0:string) -> 2:int, ConstantVectorExpression(val 0) -> 3:int) -> 4:int Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 5 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 2042f6d508..5317641422 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -167,46 +167,51 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueExpressions: StringLength(col 0:string) -> 1:int, VectorCoalesce(columns [2, 3])(children: StringLength(col 0:string) -> 2:int, ConstantVectorExpression(val 0) -> 3:int) -> 4:int Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) + value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') + aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll') mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary) Reducer 5 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 96c83be6b2..5f98623aec 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -1142,7 +1142,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -1169,36 +1169,40 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1248,7 +1252,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -1275,36 +1279,40 @@ STAGE PLANS: outputColumnNames: key1, key2, val Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + aggregations: max(length(key1)), avg(COALESCE(length(key1),0)), count(1), count(key1), compute_bit_vector(key1, 'hll'), max(length(key2)), avg(COALESCE(length(key2),0)), count(key2), compute_bit_vector(key2, 'hll'), min(val), max(val), count(val), compute_bit_vector(val, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index beb3eb503d..bcf8a68a7f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_01_n0 POSTHOOK: Output: default@e011_01_n0 #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_02_n0 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_02_n0 @@ -195,7 +195,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_02_n0 POSTHOOK: Output: default@e011_02_n0 #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_03_n0 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_03_n0 @@ -206,7 +206,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_03_n0 POSTHOOK: Output: default@e011_03_n0 #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_01_small @@ -217,7 +217,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_01_small POSTHOOK: Output: default@e011_01_small #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_02_small @@ -228,7 +228,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_02_small POSTHOOK: Output: default@e011_02_small #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@e011_03_small @@ -239,7 +239,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@e011_03_small POSTHOOK: Output: default@e011_03_small #### A masked pattern was here #### -_c0 _c1 +columntype0 min0 max0 countnulls0 numdistinctvalues0 ndvbitvector0 columntype1 min1 max1 countnulls1 numdistinctvalues1 ndvbitvector1 PREHOOK: query: explain vectorization detail select sum(sum(c1)) over() from e011_01_n0 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index e1a10916e5..ba31832201 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -5698,27 +5698,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'LONG' (type: string), UDFToLong(_col21) (type: bigint), UDFToLong(_col22) (type: bigint), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1588 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1588 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5776,36 +5780,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, s Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(r), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(dr), compute_bit_vector(dr, 'hll'), min(s), max(s), count(s), compute_bit_vector(s, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: double), _col22 (type: double), _col23 (type: bigint), _col24 (type: binary) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Vectorization: @@ -5956,36 +5964,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, cud, s2, fv1 Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(s2, 'hll'), compute_stats(fv1, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(r), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(dr), compute_bit_vector(dr, 'hll'), min(cud), max(cud), count(cud), compute_bit_vector(cud, 'hll'), min(s2), max(s2), count(s2), compute_bit_vector(s2, 'hll'), min(fv1), max(fv1), count(fv1), compute_bit_vector(fv1, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: int), _col23 (type: bigint), _col24 (type: binary), _col25 (type: double), _col26 (type: double), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: int), _col31 (type: bigint), _col32 (type: binary) Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), min(VALUE._col25), max(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), min(VALUE._col29), max(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'LONG' (type: string), UDFToLong(_col21) (type: bigint), UDFToLong(_col22) (type: bigint), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'LONG' (type: string), UDFToLong(_col29) (type: bigint), UDFToLong(_col30) (type: bigint), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47 + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: @@ -6120,16 +6132,16 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, c, ca, fv Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(c, 'hll'), compute_stats(ca, 'hll'), compute_stats(fv, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(c), max(c), count(c), compute_bit_vector(c, 'hll'), min(ca), max(ca), count(ca), compute_bit_vector(ca, 'hll'), min(fv), max(fv), count(fv), compute_bit_vector(fv, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: int), _col23 (type: bigint), _col24 (type: binary) Stage: Stage-4 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out index 314300f371..bf0f28e06a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -897,7 +897,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -945,36 +945,40 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 4096 Data size: 16396 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(col2), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(col3), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DOUBLE' (type: string), _col5 (type: double), _col6 (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index d7f8f73b9b..56c84c7006 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -109,7 +109,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -134,36 +134,40 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 500882866c..1b9ae115a8 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -4213,7 +4213,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4271,36 +4271,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, r, dr, s Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(r), max(r), count(r), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(dr), compute_bit_vector(dr, 'hll'), min(s), max(s), count(s), compute_bit_vector(s, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: double), _col22 (type: double), _col23 (type: bigint), _col24 (type: binary) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: llap Reduce Vectorization: @@ -4414,36 +4418,40 @@ STAGE PLANS: outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector(p_mfgr, 'hll'), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector(p_name, 'hll'), min(p_size), max(p_size), count(p_size), compute_bit_vector(p_size, 'hll'), min(s2), max(s2), count(s2), compute_bit_vector(s2, 'hll'), min(r), max(r), count(r), compute_bit_vector(r, 'hll'), min(dr), max(dr), count(dr), compute_bit_vector(dr, 'hll'), min(cud), max(cud), count(cud), compute_bit_vector(cud, 'hll'), min(fv1), max(fv1), count(fv1), compute_bit_vector(fv1, 'hll') minReductionHashAggr: 0.96153843 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: int), _col23 (type: bigint), _col24 (type: binary), _col25 (type: double), _col26 (type: double), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: int), _col31 (type: bigint), _col32 (type: binary) Reducer 7 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported vectorized: false Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector(VALUE._col24), min(VALUE._col25), max(VALUE._col26), count(VALUE._col27), compute_bit_vector(VALUE._col28), min(VALUE._col29), max(VALUE._col30), count(VALUE._col31), compute_bit_vector(VALUE._col32) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'LONG' (type: string), UDFToLong(_col21) (type: bigint), UDFToLong(_col22) (type: bigint), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'LONG' (type: string), UDFToLong(_col29) (type: bigint), UDFToLong(_col30) (type: bigint), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47 + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index 9e2c40d157..7592a73ec8 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -1351,11 +1351,11 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1392,22 +1392,26 @@ STAGE PLANS: Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain analyze table src compute statistics PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/nullformatCTAS.q.out b/ql/src/test/results/clientpositive/nullformatCTAS.q.out index 792abe701a..7d3d5eb5b4 100644 --- a/ql/src/test/results/clientpositive/nullformatCTAS.q.out +++ b/ql/src/test/results/clientpositive/nullformatCTAS.q.out @@ -81,29 +81,33 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(COALESCE(length(col1),0)), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/smb_mapjoin9.q.out b/ql/src/test/results/clientpositive/smb_mapjoin9.q.out index a5588bc8b8..b410feaf2c 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -299,29 +299,33 @@ STAGE PLANS: outputColumnNames: col1, col2, col3, col4 Statistics: Num rows: 550 Data size: 2200 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll') + aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector(col2, 'hll'), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector(col3, 'hll'), min(col4), max(col4), count(col4), compute_bit_vector(col4, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector(VALUE._col16) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 0a8355d03f..424614adaa 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -88,12 +88,12 @@ STAGE PLANS: outputColumnNames: key, value1, value2, ds Statistics: Num rows: 500 Data size: 225500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -130,23 +130,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -266,12 +266,12 @@ STAGE PLANS: outputColumnNames: key, value1, value2, ds Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), min(value1), max(value1), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1389 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 717 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -308,23 +308,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1389 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 717 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 581 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 881 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1491,12 +1491,12 @@ STAGE PLANS: outputColumnNames: key, value1, value2, ds Statistics: Num rows: 500 Data size: 225500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value1)), avg(COALESCE(length(value1),0)), count(value1), compute_bit_vector(value1, 'hll'), max(length(value2)), avg(COALESCE(length(value2),0)), count(value2), compute_bit_vector(value2, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1533,23 +1533,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 789 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: struct), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector(VALUE._col12) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col10,0)) (type: bigint), COALESCE(_col11,0) (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index f5eef92280..57e81f9cb1 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -167,12 +167,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -209,23 +209,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,12 +306,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -348,23 +348,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -446,12 +446,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -488,23 +488,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -586,12 +586,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -628,23 +628,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -726,12 +726,12 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll') keys: ds (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -768,23 +768,23 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: struct), _col8 (type: bigint), _col9 (type: binary) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 417 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col6,0)) (type: bigint), COALESCE(_col7,0) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index 38da086cac..22369c8d82 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -102,19 +102,21 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1/1 width=440) - Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_2] - table:{"name:":"default.t_n28"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - PARTITION_ONLY_SHUFFLE [RS_8] - Select Operator [SEL_7] (rows=500/500 width=87) - Output:["col1"] - Please refer to the previous Select Operator [SEL_1] + Select Operator [SEL_10] (rows=1/1 width=266) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_9] (rows=1/1 width=172) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t_n28"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_8] + Select Operator [SEL_7] (rows=500/500 width=87) + Output:["col1"] + Please refer to the previous Select Operator [SEL_1] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -161,19 +163,21 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_8] - Group By Operator [GBY_6] (rows=1/1 width=440) - Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_2] - table:{"name:":"default.t_n28"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - PARTITION_ONLY_SHUFFLE [RS_5] - Select Operator [SEL_4] (rows=500/500 width=87) - Output:["key"] - Please refer to the previous Select Operator [SEL_1] + Select Operator [SEL_7] (rows=1/1 width=266) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_6] (rows=1/1 width=172) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t_n28"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_5] + Select Operator [SEL_4] (rows=500/500 width=87) + Output:["key"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: select key from src limit 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 5af43def9c..33546dd74f 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -298,14 +298,16 @@ Stage-2 Stage-0 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500/500 width=178) - default@src_stats,src_stats,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_4] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_3] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500/500 width=178) + default@src_stats,src_stats,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) PREHOOK: type: CREATEMACRO @@ -412,19 +414,21 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - File Output Operator [FS_13] - table:{"name:":"default.src_autho_test_n4"} - Select Operator [SEL_12] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_15] - Select Operator [SEL_14] (rows=500/500 width=178) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_12] + Select Operator [SEL_10] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_9] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + File Output Operator [FS_13] + table:{"name:":"default.src_autho_test_n4"} + Select Operator [SEL_12] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500/500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_15] + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_12] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -769,21 +773,23 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_9] - Group By Operator [GBY_7] (rows=1/1 width=2824) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - File Output Operator [FS_14] - table:{"name:":"default.orc_merge5_n1"} - Select Operator [SEL_13] (rows=1/3 width=352) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_12] (rows=1/3 width=352) - predicate:(userid <= 13L) - TableScan [TS_0] (rows=1/15000 width=352) - default@orc_merge5_n1,orc_merge5_n1,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - PARTITION_ONLY_SHUFFLE [RS_16] - Select Operator [SEL_15] (rows=1/3 width=352) - Output:["userid","string1","subtype","decimal1","ts"] - Please refer to the previous Select Operator [SEL_13] + Select Operator [SEL_8] (rows=1/1 width=1468) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] + Group By Operator [GBY_7] (rows=1/1 width=1468) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"],aggregations:["min(VALUE._col0)","max(VALUE._col0)","count(1)","count(VALUE._col0)","compute_bit_vector(VALUE._col0, 'hll')","max(VALUE._col3)","avg(VALUE._col4)","count(VALUE._col5)","compute_bit_vector(VALUE._col5, 'hll')","min(VALUE._col6)","max(VALUE._col6)","count(VALUE._col6)","compute_bit_vector(VALUE._col6, 'hll')","min(VALUE._col7)","max(VALUE._col7)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')","min(VALUE._col8)","max(VALUE._col8)","count(VALUE._col8)","compute_bit_vector(VALUE._col8, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + File Output Operator [FS_14] + table:{"name:":"default.orc_merge5_n1"} + Select Operator [SEL_13] (rows=1/3 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_12] (rows=1/3 width=352) + predicate:(userid <= 13L) + TableScan [TS_0] (rows=1/15000 width=352) + default@orc_merge5_n1,orc_merge5_n1,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_16] + Select Operator [SEL_15] (rows=1/3 width=352) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_13] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 5088a3d155..63cd2c0137 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -64,14 +64,16 @@ Stage-2 Stage-0 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500/500 width=178) - default@src_stats_n0,src_stats_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_4] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_3] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500/500 width=178) + default@src_stats_n0,src_stats_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_multi2_n7 PREHOOK: type: DROPTABLE @@ -124,54 +126,56 @@ Stage-3 Stage-1 Reducer 5 File Output Operator [FS_25] - Group By Operator [GBY_23] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_19] - table:{"name:":"default.src_multi2_n7"} - Select Operator [SEL_18] (rows=830/508 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_43] (rows=830/508 width=178) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_12] (rows=500/500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=525/319 width=87) - Output:["_col0"] - Group By Operator [GBY_10] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_48] - PartitionCols:_col0, _col1 - Select Operator [SEL_46] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_45] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_44] (rows=500/500 width=178) - Output:["key","value"] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_53] - PartitionCols:_col0, _col1 - Select Operator [SEL_51] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_22] - Select Operator [SEL_21] (rows=830/508 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_18] + Select Operator [SEL_24] (rows=1/1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_23] (rows=1/1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(1)","count(VALUE._col3)","compute_bit_vector(VALUE._col3, 'hll')","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col7, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_19] + table:{"name:":"default.src_multi2_n7"} + Select Operator [SEL_18] (rows=830/508 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_43] (rows=830/508 width=178) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col2"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_31] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_12] (rows=500/500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=525/319 width=87) + Output:["_col0"] + Group By Operator [GBY_10] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_48] + PartitionCols:_col0, _col1 + Select Operator [SEL_46] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_45] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_44] (rows=500/500 width=178) + Output:["key","value"] + <-Map 6 [CONTAINS] + Reduce Output Operator [RS_53] + PartitionCols:_col0, _col1 + Select Operator [SEL_51] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_50] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_49] (rows=25/25 width=175) + Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_22] + Select Operator [SEL_21] (rows=830/508 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_18] PREHOOK: query: select count(*) from (select * from src union select * from src1)subq PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index d886a26f78..f1c245b671 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -254,16 +254,18 @@ Stage-2 Stage-0 Reducer 2 File Output Operator [FS_6] - Group By Operator [GBY_4] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_1] (rows=500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_5] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_4] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector(key, 'hll')","max(length(value))","avg(COALESCE(length(value),0))","count(value)","compute_bit_vector(value, 'hll')"] + Select Operator [SEL_1] (rows=500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) @@ -340,21 +342,23 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_12] - Group By Operator [GBY_10] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_2] - table:{"name:":"default.src_autho_test_n3"} - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_9] - Group By Operator [GBY_8] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_7] (rows=500 width=178) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_1] + Select Operator [SEL_11] (rows=1 width=532) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_10] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.src_autho_test_n3"} + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_9] + Group By Operator [GBY_8] (rows=1 width=472) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector(col1, 'hll')","max(length(col2))","avg(COALESCE(length(col2),0))","count(col2)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_7] (rows=500 width=178) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_1] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -617,23 +621,25 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_10] - Group By Operator [GBY_8] (rows=1 width=2824) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - File Output Operator [FS_3] - table:{"name:":"default.orc_merge5_n0"} - Select Operator [SEL_2] (rows=1 width=352) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_11] (rows=1 width=352) - predicate:(userid <= 13L) - TableScan [TS_0] (rows=1 width=352) - default@orc_merge5_n0,orc_merge5_n0,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - PARTITION_ONLY_SHUFFLE [RS_7] - Group By Operator [GBY_6] (rows=1 width=2760) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(userid, 'hll')","compute_stats(string1, 'hll')","compute_stats(subtype, 'hll')","compute_stats(decimal1, 'hll')","compute_stats(ts, 'hll')"] - Select Operator [SEL_5] (rows=1 width=352) - Output:["userid","string1","subtype","decimal1","ts"] - Please refer to the previous Select Operator [SEL_2] + Select Operator [SEL_9] (rows=1 width=1536) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] + Group By Operator [GBY_8] (rows=1 width=1536) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector(VALUE._col12)","min(VALUE._col13)","max(VALUE._col14)","count(VALUE._col15)","compute_bit_vector(VALUE._col16)","min(VALUE._col17)","max(VALUE._col18)","count(VALUE._col19)","compute_bit_vector(VALUE._col20)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5_n0"} + Select Operator [SEL_2] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_11] (rows=1 width=352) + predicate:(userid <= 13L) + TableScan [TS_0] (rows=1 width=352) + default@orc_merge5_n0,orc_merge5_n0,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_7] + Group By Operator [GBY_6] (rows=1 width=1536) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"],aggregations:["min(userid)","max(userid)","count(1)","count(userid)","compute_bit_vector(userid, 'hll')","max(length(string1))","avg(COALESCE(length(string1),0))","count(string1)","compute_bit_vector(string1, 'hll')","min(subtype)","max(subtype)","count(subtype)","compute_bit_vector(subtype, 'hll')","min(decimal1)","max(decimal1)","count(decimal1)","compute_bit_vector(decimal1, 'hll')","min(ts)","max(ts)","count(ts)","compute_bit_vector(ts, 'hll')"] + Select Operator [SEL_5] (rows=1 width=352) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out index cfe4481326..4d9990a7ff 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out @@ -42,66 +42,68 @@ Stage-3 Stage-1 Reducer 3 File Output Operator [FS_23] - Group By Operator [GBY_21] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Union 2 [CUSTOM_SIMPLE_EDGE] - <-Map 1 [CONTAINS] - File Output Operator [FS_31] - table:{"name:":"default.x"} - Select Operator [SEL_30] (rows=6 width=91) - Output:["_col0","_col1"] - Select Operator [SEL_28] (rows=2 width=91) - Output:["_col1"] - Filter Operator [FIL_27] (rows=2 width=87) - predicate:(key = '238') - TableScan [TS_26] (rows=500 width=87) - Output:["key"] - Reduce Output Operator [RS_34] - Group By Operator [GBY_33] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_32] (rows=6 width=91) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_30] - <-Map 4 [CONTAINS] - File Output Operator [FS_45] - table:{"name:":"default.x"} - Select Operator [SEL_44] (rows=6 width=91) - Output:["_col0","_col1"] - Select Operator [SEL_42] (rows=4 width=87) - Output:["_col1"] - Lateral View Join Operator [LVJ_40] (rows=4 width=239) - Output:["_col5"] - Select Operator [SEL_38] (rows=2 width=431) - Lateral View Forward [LVF_37] (rows=2 width=86) - Filter Operator [FIL_36] (rows=2 width=86) - predicate:(key = '238') - TableScan [TS_35] (rows=25 width=86) - Output:["key"] - Reduce Output Operator [RS_48] - Group By Operator [GBY_47] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_46] (rows=6 width=91) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_44] - File Output Operator [FS_45] - table:{"name:":"default.x"} - Select Operator [SEL_44] (rows=6 width=91) - Output:["_col0","_col1"] - Select Operator [SEL_42] (rows=4 width=87) - Output:["_col1"] - Lateral View Join Operator [LVJ_40] (rows=4 width=239) - Output:["_col5"] - UDTF Operator [UDTF_41] (rows=2 width=48) - function name:explode - Select Operator [SEL_39] (rows=2 width=48) - Output:["_col0"] - Please refer to the previous Lateral View Forward [LVF_37] - Reduce Output Operator [RS_48] - Group By Operator [GBY_47] (rows=1 width=864) - Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"] - Select Operator [SEL_46] (rows=6 width=91) - Output:["col1","col2"] - Please refer to the previous Select Operator [SEL_44] + Select Operator [SEL_22] (rows=1 width=530) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_21] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector(VALUE._col8)"] + <-Union 2 [CUSTOM_SIMPLE_EDGE] + <-Map 1 [CONTAINS] + File Output Operator [FS_31] + table:{"name:":"default.x"} + Select Operator [SEL_30] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_28] (rows=2 width=91) + Output:["_col1"] + Filter Operator [FIL_27] (rows=2 width=87) + predicate:(key = '238') + TableScan [TS_26] (rows=500 width=87) + Output:["key"] + Reduce Output Operator [RS_34] + Group By Operator [GBY_33] (rows=1 width=400) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector(col1, 'hll')","min(col2)","max(col2)","count(col2)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_32] (rows=6 width=91) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_30] + <-Map 4 [CONTAINS] + File Output Operator [FS_45] + table:{"name:":"default.x"} + Select Operator [SEL_44] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_42] (rows=4 width=87) + Output:["_col1"] + Lateral View Join Operator [LVJ_40] (rows=4 width=239) + Output:["_col5"] + Select Operator [SEL_38] (rows=2 width=431) + Lateral View Forward [LVF_37] (rows=2 width=86) + Filter Operator [FIL_36] (rows=2 width=86) + predicate:(key = '238') + TableScan [TS_35] (rows=25 width=86) + Output:["key"] + Reduce Output Operator [RS_48] + Group By Operator [GBY_47] (rows=1 width=400) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector(col1, 'hll')","min(col2)","max(col2)","count(col2)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_46] (rows=6 width=91) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_45] + table:{"name:":"default.x"} + Select Operator [SEL_44] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_42] (rows=4 width=87) + Output:["_col1"] + Lateral View Join Operator [LVJ_40] (rows=4 width=239) + Output:["_col5"] + UDTF Operator [UDTF_41] (rows=2 width=48) + function name:explode + Select Operator [SEL_39] (rows=2 width=48) + Output:["_col0"] + Please refer to the previous Lateral View Forward [LVF_37] + Reduce Output Operator [RS_48] + Group By Operator [GBY_47] (rows=1 width=400) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector(col1, 'hll')","min(col2)","max(col2)","count(col2)","compute_bit_vector(col2, 'hll')"] + Select Operator [SEL_46] (rows=6 width=91) + Output:["col1","col2"] + Please refer to the previous Select Operator [SEL_44] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6)