diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e494f8e..669d0ea 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -46,8 +46,7 @@ minitez.query.files.shared=delete_orig_table.q,\ update_orig_table.q,\ vector_join_part_col_char.q,\ vector_non_string_partition.q,\ - vectorization_div0.q,\ - vectorization_limit.q + vectorization_div0.q # NOTE: Add tests to minitez only if it is very # specific to tez and cannot be added to minillap. @@ -759,7 +758,11 @@ minillaplocal.query.files=\ vector_like_2.q,\ vector_llap_text_1.q,\ vector_mapjoin_reduce.q,\ + vector_null_map.q,\ vector_number_compare_projection.q,\ + vector_orc_merge_incompat_schema.q,\ + vector_orc_nested_column_pruning.q,\ + vector_orc_null_check.q,\ vector_order_null.q,\ vector_outer_reference_windowed.q,\ vector_partitioned_date_time.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index bd594e6..55dc461 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; @@ -61,10 +62,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.DateUtils; @@ -1431,11 +1434,13 @@ private static VectorExpressionWriter genVectorExpressionWritableList( SettableListObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterList() { + private Object obj; private VectorExtractRow vectorExtractRow; private ListTypeInfo listTypeInfo; public VectorExpressionWriter init(SettableListObjectInspector objInspector) throws HiveException { super.init(objInspector); + obj = initValue(null); vectorExtractRow = new VectorExtractRow(); listTypeInfo = (ListTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); @@ -1450,24 +1455,43 @@ public Object initValue(Object ignored) { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - return setValue(null, column, row); + final ListColumnVector listColVector = (ListColumnVector) column; + final SettableListObjectInspector listOI = + (SettableListObjectInspector) this.objectInspector; + final List value = (List) vectorExtractRow.extractRowColumn(listColVector, + listTypeInfo, listOI, row); + if (value == null) { + return null; + } + + listOI.resize(obj, value.size()); + for (int i = 0; i < value.size(); i++) { + listOI.set(obj, i, value.get(i)); + } + return obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) + public Object setValue(Object list, ColumnVector column, int row) throws HiveException { + if (list == null) { + list = initValue(null); + } + final ListColumnVector listColVector = (ListColumnVector) column; final SettableListObjectInspector listOI = (SettableListObjectInspector) this.objectInspector; - final List value = (List)vectorExtractRow.extractRowColumn(listColVector, - listTypeInfo, listOI, columnRow); - if (null == row) { - row = ((SettableListObjectInspector) this.objectInspector).create(value.size()); + final List value = (List) vectorExtractRow.extractRowColumn(listColVector, + listTypeInfo, listOI, row); + if (value == null) { + return null; } + + listOI.resize(list, value.size()); for (int i = 0; i < value.size(); i++) { - listOI.set(row, i, value.get(i)); + listOI.set(list, i, value.get(i)); } - return row; + return list; } }.init(fieldObjInspector); @@ -1478,10 +1502,15 @@ private static VectorExpressionWriter genVectorExpressionWritableMap( return new VectorExpressionWriterMap() { private Object obj; + private VectorExtractRow vectorExtractRow; + private MapTypeInfo mapTypeInfo; public VectorExpressionWriter init(SettableMapObjectInspector objInspector) throws HiveException { super.init(objInspector); obj = initValue(null); + vectorExtractRow = new VectorExtractRow(); + mapTypeInfo = (MapTypeInfo) + TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); return this; } @@ -1493,13 +1522,45 @@ public Object initValue(Object ignored) { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + final MapColumnVector mapColVector = (MapColumnVector) column; + final SettableMapObjectInspector mapOI = + (SettableMapObjectInspector) this.objectInspector; + final Map value = + (Map) vectorExtractRow.extractRowColumn( + mapColVector, mapTypeInfo, mapOI, row); + if (value == null) { + return null; + } + + mapOI.clear(obj); + for (Map.Entry entry : value.entrySet()) { + mapOI.put(obj, entry.getKey(), entry.getValue()); + } + return obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) + public Object setValue(Object map, ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + if (map == null) { + map = initValue(null); + } + + final MapColumnVector mapColVector = (MapColumnVector) column; + final SettableMapObjectInspector mapOI = + (SettableMapObjectInspector) this.objectInspector; + final Map value = + (Map) vectorExtractRow.extractRowColumn( + mapColVector, mapTypeInfo, mapOI, row); + if (value == null) { + return null; + } + + mapOI.clear(map); + for (Map.Entry entry : value.entrySet()) { + mapOI.put(map, entry.getKey(), entry.getValue()); + } + return map; } }.init(fieldObjInspector); } @@ -1546,9 +1607,9 @@ public Object writeValue(ColumnVector column, int row) throws HiveException { } @Override - public Object setValue(Object field, ColumnVector column, int row) throws HiveException { - if (null == field) { - field = initValue(null); + public Object setValue(Object struct, ColumnVector column, int row) throws HiveException { + if (struct == null) { + struct = initValue(null); } final StructColumnVector structColVector = (StructColumnVector) column; @@ -1562,9 +1623,9 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx final StructField structField = fields.get(i); final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i], fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row); - structOI.setStructFieldData(obj, structField, value); + structOI.setStructFieldData(struct, structField, value); } - return field; + return struct; } }.init(fieldObjInspector); } @@ -1574,10 +1635,15 @@ private static VectorExpressionWriter genVectorExpressionWritableUnion( return new VectorExpressionWriterMap() { private Object obj; + private VectorExtractRow vectorExtractRow; + private UnionTypeInfo unionTypeInfo; public VectorExpressionWriter init(SettableUnionObjectInspector objInspector) throws HiveException { super.init(objInspector); obj = initValue(null); + vectorExtractRow = new VectorExtractRow(); + unionTypeInfo = (UnionTypeInfo) + TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); return this; } @@ -1589,13 +1655,46 @@ public Object initValue(Object ignored) { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + + final UnionColumnVector unionColumnVector = (UnionColumnVector) column; + final int tag = unionColumnVector.tags[row]; + final SettableUnionObjectInspector unionOI = + (SettableUnionObjectInspector) this.objectInspector; + ObjectInspector fieldOI = unionOI.getObjectInspectors().get(tag); + ColumnVector fieldColVector = unionColumnVector.fields[tag]; + final Object value = + vectorExtractRow.extractRowColumn( + fieldColVector, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), fieldOI, row); + if (value == null) { + return null; + } + + unionOI.setFieldAndTag(obj, value, (byte) tag); + return obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) + public Object setValue(Object union, ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + if (union == null) { + union = initValue(null); + } + + final UnionColumnVector unionColumnVector = (UnionColumnVector) column; + final int tag = unionColumnVector.tags[row]; + final SettableUnionObjectInspector unionOI = + (SettableUnionObjectInspector) this.objectInspector; + ObjectInspector fieldOI = unionOI.getObjectInspectors().get(tag); + ColumnVector fieldColVector = unionColumnVector.fields[tag]; + final Object value = + vectorExtractRow.extractRowColumn( + fieldColVector, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), fieldOI, row); + if (value == null) { + return null; + } + + unionOI.setFieldAndTag(union, value, (byte) tag); + return union; } }.init(fieldObjInspector); } diff --git ql/src/test/queries/clientpositive/nullMap.q ql/src/test/queries/clientpositive/null_map.q similarity index 85% rename from ql/src/test/queries/clientpositive/nullMap.q rename to ql/src/test/queries/clientpositive/null_map.q index d2784b7..f272bb9 100644 --- ql/src/test/queries/clientpositive/nullMap.q +++ ql/src/test/queries/clientpositive/null_map.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + create table map_txt ( id int, content map diff --git ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q index 098b41e..2396194 100644 --- ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q +++ ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + set hive.metastore.disallow.incompatible.col.type.changes=false; CREATE TABLE orc_create_staging ( diff --git ql/src/test/queries/clientpositive/orc_nested_column_pruning.q ql/src/test/queries/clientpositive/orc_nested_column_pruning.q new file mode 100644 index 0000000..700fdd4 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_nested_column_pruning.q @@ -0,0 +1,214 @@ +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion = none; +set hive.exec.dynamic.partition.mode = nonstrict; +set hive.strict.checks.cartesian.product=false; + +-- First, create source tables +DROP TABLE IF EXISTS dummy; +CREATE TABLE dummy (i int); +INSERT INTO TABLE dummy VALUES (42); + +DROP TABLE IF EXISTS nested_tbl_1; +CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>>, + s4 map>, + s5 struct>>>, + s6 map>>>> +) STORED AS ORC; + +INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy; + +DROP TABLE IF EXISTS nested_tbl_2; +CREATE TABLE nested_tbl_2 LIKE nested_tbl_1; + +INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy; + +-- Testing only select statements + +EXPLAIN SELECT a FROM nested_tbl_1; +SELECT a FROM nested_tbl_1; + +EXPLAIN SELECT s1.f1 FROM nested_tbl_1; +SELECT s1.f1 FROM nested_tbl_1; + +EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1; +SELECT s1.f1, s1.f2 FROM nested_tbl_1; + +-- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; + +-- Testing select array and index shifting +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1; +SELECT s1.f3.f5 FROM nested_tbl_1; + +-- Testing select from multiple structs +EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; + + +-- Testing select with filter + +EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; + +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; + +EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; + + +-- Testing lateral view + +EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; + + +-- Testing UDFs +EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; + + +-- Testing aggregations + +EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; + +EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; + +EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; + + +-- Testing joins + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; + +EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; + +EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; + +EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; + +-- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3; +CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC; + +INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1; + +SELECT * FROM nested_tbl_3; + +-- Testing select struct field from elements in array or map + +EXPLAIN +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +EXPLAIN +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +EXPLAIN +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +EXPLAIN +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +EXPLAIN +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +EXPLAIN +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; + +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; diff --git ql/src/test/queries/clientpositive/orc_null_check.q ql/src/test/queries/clientpositive/orc_null_check.q index 2cb1190..e5453fc 100644 --- ql/src/test/queries/clientpositive/orc_null_check.q +++ ql/src/test/queries/clientpositive/orc_null_check.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + create table listtable(l array); create table listtable_orc(l array) stored as orc; diff --git ql/src/test/queries/clientpositive/vector_null_map.q ql/src/test/queries/clientpositive/vector_null_map.q new file mode 100644 index 0000000..bda6705 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_null_map.q @@ -0,0 +1,21 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table map_txt ( + id int, + content map +) +row format delimited +null defined as '\\N' +stored as textfile +; + +LOAD DATA LOCAL INPATH '../../data/files/mapNull.txt' INTO TABLE map_txt; + +explain vectorization expression +select * from map_txt; +select * from map_txt; + +explain vectorization expression +select id, map_keys(content) from map_txt; +select id, map_keys(content) from map_txt; diff --git ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q new file mode 100644 index 0000000..b03a084 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q @@ -0,0 +1,53 @@ +SET hive.vectorized.execution.enabled=true; + +set hive.metastore.disallow.incompatible.col.type.changes=false; + +CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':'; + +LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging; + +CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000"); + +INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging; +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +-- will be merged as the schema is the same +ALTER TABLE orc_create_complex CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT; + +EXPLAIN VECTORIZATION +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging; +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +EXPLAIN VECTORIZATION +select sum(hash(*)) from orc_create_complex; +select sum(hash(*)) from orc_create_complex; + +-- schema is different for both files, will not be merged +ALTER TABLE orc_create_complex CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; diff --git ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q new file mode 100644 index 0000000..3121ec8 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q @@ -0,0 +1,233 @@ +set hive.fetch.task.conversion = none; +set hive.exec.dynamic.partition.mode = nonstrict; +set hive.strict.checks.cartesian.product=false; +SET hive.vectorized.execution.enabled=true; + +-- First, create source tables +DROP TABLE IF EXISTS dummy; +CREATE TABLE dummy (i int); +INSERT INTO TABLE dummy VALUES (42); + +DROP TABLE IF EXISTS nested_tbl_1; +CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>>, + s4 map>, + s5 struct>>>, + s6 map>>>> +) STORED AS ORC; + +INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy; + +DROP TABLE IF EXISTS nested_tbl_2; +CREATE TABLE nested_tbl_2 LIKE nested_tbl_1; + +INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy; + +-- Testing only select statements + +EXPLAIN VECTORIZATION +SELECT a FROM nested_tbl_1; +SELECT a FROM nested_tbl_1; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1 FROM nested_tbl_1; +SELECT s1.f1 FROM nested_tbl_1; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1, s1.f2 FROM nested_tbl_1; +SELECT s1.f1, s1.f2 FROM nested_tbl_1; + +-- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; + +-- Testing select array and index shifting +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1; +SELECT s1.f3.f5 FROM nested_tbl_1; + +-- Testing select from multiple structs +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; + + +-- Testing select with filter + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; + + +-- Testing lateral view + +EXPLAIN VECTORIZATION EXPRESSION +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; + + +-- Testing UDFs +EXPLAIN VECTORIZATION EXPRESSION +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; + + +-- Testing aggregations + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; + + +-- Testing joins + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; + +-- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3; +CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC; + +INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1; + +SELECT * FROM nested_tbl_3; + +-- Testing select struct field from elements in array or map + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; + +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; diff --git ql/src/test/queries/clientpositive/vector_orc_null_check.q ql/src/test/queries/clientpositive/vector_orc_null_check.q new file mode 100644 index 0000000..8f415c3 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_orc_null_check.q @@ -0,0 +1,13 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table listtable(l array); +create table listtable_orc(l array) stored as orc; + +insert overwrite table listtable select array(null) from src; +insert overwrite table listtable_orc select * from listtable; + +explain vectorization expression +select size(l) from listtable_orc limit 10; +select size(l) from listtable_orc limit 10; + diff --git ql/src/test/queries/clientpositive/vectorization_limit.q ql/src/test/queries/clientpositive/vectorization_limit.q index 8044484..30d5500 100644 --- ql/src/test/queries/clientpositive/vectorization_limit.q +++ ql/src/test/queries/clientpositive/vectorization_limit.q @@ -5,8 +5,9 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; -SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; +explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7; +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7; set hive.optimize.reducededuplication.min.reducer=1; set hive.limit.pushdown.memory.usage=0.3f; @@ -14,30 +15,30 @@ set hive.limit.pushdown.memory.usage=0.3f; -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20; +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20; -- deduped RS explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20; +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20; -- distincts explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20; -select distinct(ctinyint) from alltypesorc limit 20; +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20; +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20; explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20; +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20; -- limit zero explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0; +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0; -- 2MR (applied to last RS) explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20; +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20; diff --git ql/src/test/results/clientpositive/llap/vector_null_map.q.out ql/src/test/results/clientpositive/llap/vector_null_map.q.out new file mode 100644 index 0000000..666f7fd --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_null_map.q.out @@ -0,0 +1,173 @@ +PREHOOK: query: create table map_txt ( + id int, + content map +) +row format delimited +null defined as '\\N' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@map_txt +POSTHOOK: query: create table map_txt ( + id int, + content map +) +row format delimited +null defined as '\\N' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@map_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/mapNull.txt' INTO TABLE map_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@map_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/mapNull.txt' INTO TABLE map_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@map_txt +PREHOOK: query: explain vectorization expression +select * from map_txt +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from map_txt +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: map_txt + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: id (type: int), content (type: map) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from map_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@map_txt +#### A masked pattern was here #### +POSTHOOK: query: select * from map_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_txt +#### A masked pattern was here #### +1 NULL +PREHOOK: query: explain vectorization expression +select id, map_keys(content) from map_txt +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select id, map_keys(content) from map_txt +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: map_txt + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: id (type: int), map_keys(content) (type: array) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] + selectExpressions: VectorUDFAdaptor(map_keys(content)) -> 3:array + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id, map_keys(content) from map_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@map_txt +#### A masked pattern was here #### +POSTHOOK: query: select id, map_keys(content) from map_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_txt +#### A masked pattern was here #### +1 [] diff --git ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out new file mode 100644 index 0000000..ebf6853 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out @@ -0,0 +1,305 @@ +PREHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: EXPLAIN VECTORIZATION +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_staging + Pruned Column Paths: strct.a, strct.b + Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str (type: string), mp (type: map), lst (type: array), named_struct('A',strct.a,'B',strct.b,'C','c') (type: struct), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: SELECT operator: Unable to vectorize custom UDF. Encountered unsupported expr desc : Column[strct].a + vectorized: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct EXPRESSION [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION +select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 6 Data size: 21816 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(str,mp,lst,strct,val) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 21816 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out new file mode 100644 index 0000000..7ff4bb1 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -0,0 +1,2861 @@ +PREHOOK: query: DROP TABLE IF EXISTS dummy +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dummy +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dummy (i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: CREATE TABLE dummy (i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: INSERT INTO TABLE dummy VALUES (42) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dummy +POSTHOOK: query: INSERT INTO TABLE dummy VALUES (42) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy.i SCRIPT [] +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>>, + s4 map>, + s5 struct>>>, + s6 map>>>> +) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_1 +POSTHOOK: query: CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>>, + s4 map>, + s5 struct>>>, + s6 map>>>> +) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_1 +PREHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@nested_tbl_1 +POSTHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@nested_tbl_1 +POSTHOOK: Lineage: nested_tbl_1.a SIMPLE [] +POSTHOOK: Lineage: nested_tbl_1.s1 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s2 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s3 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s4 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s5 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s6 EXPRESSION [] +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_2 LIKE nested_tbl_1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_2 +POSTHOOK: query: CREATE TABLE nested_tbl_2 LIKE nested_tbl_1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_2 +PREHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@nested_tbl_2 +POSTHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@nested_tbl_2 +POSTHOOK: Lineage: nested_tbl_2.a SIMPLE [] +POSTHOOK: Lineage: nested_tbl_2.s1 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s2 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s3 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s4 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s5 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s6 EXPRESSION [] +PREHOOK: query: EXPLAIN VECTORIZATION +SELECT a FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +SELECT a FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1 FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1.f1 (type: boolean) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 0:int) -> 8:boolean + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f1 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f1 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +false +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1, s1.f2 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1, s1.f2 FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f1, s1.f2 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1.f1 (type: boolean), s1.f2 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 9] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 0:int) -> 8:boolean, VectorUDFStructField(col 1:struct,f6:int>, col 1:int) -> 9:string + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f1, s1.f2 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f1, s1.f2 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +false foo +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 4 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f5 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1.f3.f5 (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [9] + selectExpressions: VectorUDFStructField(col 8:struct, col 1:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:double + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f4, s2.f8.f9 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1.f3.f4 (type: int), s2.f8.f9 (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [9, 11] + selectExpressions: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int, VectorUDFStructField(col 10:struct,f11:map>, col 0:int)(children: VectorUDFStructField(col 2:struct,f11:map>>, col 1:int) -> 10:struct,f11:map>) -> 11:boolean + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +4 true +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f2, s1.f1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 8:boolean, val 0)(children: VectorUDFStructField(col 1:struct,f6:int>, col 0:int) -> 8:boolean) + predicate: (s1.f1 = false) (type: boolean) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f2 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [9] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 1:int) -> 9:string + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +foo +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f5, s1.f3.f4 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 9:int, val 4)(children: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int) + predicate: (s1.f3.f4 = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f5 (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10] + selectExpressions: VectorUDFStructField(col 8:struct, col 1:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 10:double + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f2, s2.f8 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and s2.f8.f11['key1']) (type: boolean) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s2.f8 (type: struct,f11:map>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Unable to vectorize custom UDF. Encountered unsupported expr desc : Column[s2].f8.f10 + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f9":true,"f10":[10,11],"f11":{"key2":false,"key1":true}} +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s2.f8.f10 + Statistics: Num rows: 1 Data size: 3632 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 1 Data size: 3632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s3 (type: struct>>) + outputColumnNames: s3 + Statistics: Num rows: 1 Data size: 3632 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col3, _col10 + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int) + outputColumnNames: _col10 + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col3.f12 (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: s2.f8.f10 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3632 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 3632 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col3, _col10 + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int) + outputColumnNames: _col10 + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col3.f12 (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 7264 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 14528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Lateral View Forward (LATERALVIEWFORWARD) not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +10 {"f13":"foo","f14":14} +10 {"f13":"bar","f14":28} +11 {"f13":"foo","f14":14} +11 {"f13":"bar","f14":28} +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s2.f8.f10, s1.f3.f4 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (s2.f8.f10[1] pmod s1.f3.f4) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unable to vectorize custom UDF. Encountered unsupported expr desc : Column[s1].f3.f4 + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +3 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f5, s1.f3.f4 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1.f3.f5 (type: double), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [9, 10] + selectExpressions: VectorUDFStructField(col 8:struct, col 1:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:double, VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 10:int + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 9:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 9:int)(children: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int) + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f3.f4 (type: int) + sort order: + + Map-reduce partition columns: _col0.f3.f4 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 9:boolean, val 0)(children: VectorUDFStructField(col 8:struct,f11:map>, col 0:int)(children: VectorUDFStructField(col 2:struct,f11:map>>, col 1:int) -> 8:struct,f11:map>) -> 9:boolean), SelectColumnIsNotNull(col 9:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 9:int)) + predicate: ((s2.f8.f9 = false) and s1.f6 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f6 (type: int) + sort order: + + Map-reduce partition columns: _col0.f6 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 9:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct,f11:map>>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0.f3.f4 (type: int) + 1 _col0.f6 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Input: default@nested_tbl_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Input: default@nested_tbl_2 +#### A masked pattern was here #### +5.0 {"f9":false,"f10":[20,22],"f11":{"key3":true,"key4":false}} +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 9:int)(children: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int) + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f3.f4 (type: int) + sort order: + + Map-reduce partition columns: _col0.f3.f4 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f6 is not null and s2.f8.f9) (type: boolean) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f6 (type: int) + sort order: + + Map-reduce partition columns: _col0.f6 (type: int) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct,f11:map>>) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Could not vectorize expression (mode = FILTER): Column[s2].f8.f9 + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0.f3.f4 (type: int) + 1 _col0.f6 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 {"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}} +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 9:int)(children: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int) + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 9] + selectExpressions: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), true (type: boolean) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), true (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyExpressions: ConstantVectorExpression(val 1) -> 10:boolean + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Pruned Column Paths: s1.f6, s2.f8.f9 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f6 is not null and s2.f8.f9 and s2.f8.f9 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f6 (type: int), (s2.f8.f9 = true) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Unable to vectorize custom UDF. Encountered unsupported expr desc : Column[s2].f8.f9 + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s2 (type: struct,f11:map>>) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f11:map>>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + residual filter predicates: {(_col0.f1 <> _col1.f8.f9)} + Statistics: Num rows: 1 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 9:int)(children: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int) + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 9] + selectExpressions: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct) -> 9:int + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 8:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 8:int) + predicate: s1.f6 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s2 (type: struct,f11:map>>), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 8] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 8:int + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f11:map>>) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2 + residual filter predicates: {(_col0.f1 <> _col2.f8.f9)} + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_3 +PREHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Output: default@nested_tbl_3@f3=4 +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f1 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f2 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +PREHOOK: query: SELECT * FROM nested_tbl_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_3 +PREHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM nested_tbl_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_3 +POSTHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +false foo 4 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s3.f12, s1.f6 + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s3.f12[0].f14 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 11] + selectExpressions: VectorUDFStructField(col 9:struct, col 1:int)(children: ListIndexColScalar(col 8:array>, col 0:int)(children: VectorUDFStructField(col 3:struct>>, col 0:int) -> 8:array>) -> 9:struct) -> 10:int, VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 11:int + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 11:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 10:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 2796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 14 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s4['key1'].f15 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [9, 10] + selectExpressions: VectorUDFStructField(col 8:struct, col 0:int)(children: VectorUDFMapIndexStringScalar(col 4:map>, key: key1) -> 8:struct) -> 9:int, VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 10:int + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 10:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 9:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1096 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s5.f16, s1.f6 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s5.f16[0].f18.f19 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 12] + selectExpressions: VectorUDFStructField(col 10:struct, col 0:int)(children: VectorUDFStructField(col 9:struct>, col 1:int)(children: ListIndexColScalar(col 8:array>>, col 0:int)(children: VectorUDFStructField(col 5:struct>>>, col 0:int) -> 8:array>>) -> 9:struct>) -> 10:struct) -> 11:int, VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 12:int + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 12:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 11:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 14 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6, s5.f16 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s5 (type: struct>>>) + outputColumnNames: s1, s5 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f6) + keys: s5.f16.f18.f19 (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 [14,28] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s6['key1'].f20[0].f21.f22 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [12, 13] + selectExpressions: VectorUDFStructField(col 11:struct, col 0:int)(children: VectorUDFStructField(col 10:struct>, col 0:int)(children: ListIndexColScalar(col 9:array>>, col 0:int)(children: VectorUDFStructField(col 8:struct>>>, col 0:int)(children: VectorUDFMapIndexStringScalar(col 6:map>>>>, key: key1) -> 8:struct>>>) -> 9:array>>) -> 10:struct>) -> 11:struct) -> 12:int, VectorUDFStructField(col 1:struct,f6:int>, col 3:int) -> 13:int + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 12:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 1 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s6 (type: map>>>>) + outputColumnNames: s1, s6 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f6) + keys: s6['key1'].f20.f21.f22 (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 [1] diff --git ql/src/test/results/clientpositive/llap/vector_orc_null_check.q.out ql/src/test/results/clientpositive/llap/vector_orc_null_check.q.out new file mode 100644 index 0000000..4c2c4a0 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_orc_null_check.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: create table listtable(l array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@listtable +POSTHOOK: query: create table listtable(l array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@listtable +PREHOOK: query: create table listtable_orc(l array) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@listtable_orc +POSTHOOK: query: create table listtable_orc(l array) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@listtable_orc +PREHOOK: query: insert overwrite table listtable select array(null) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@listtable +POSTHOOK: query: insert overwrite table listtable select array(null) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@listtable +POSTHOOK: Lineage: listtable.l EXPRESSION [] +PREHOOK: query: insert overwrite table listtable_orc select * from listtable +PREHOOK: type: QUERY +PREHOOK: Input: default@listtable +PREHOOK: Output: default@listtable_orc +POSTHOOK: query: insert overwrite table listtable_orc select * from listtable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@listtable +POSTHOOK: Output: default@listtable_orc +POSTHOOK: Lineage: listtable_orc.l SIMPLE [(listtable)listtable.FieldSchema(name:l, type:array, comment:null), ] +PREHOOK: query: explain vectorization expression +select size(l) from listtable_orc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select size(l) from listtable_orc limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: listtable_orc + Statistics: Num rows: 500 Data size: 913920 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: size(l) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + selectExpressions: VectorUDFAdaptor(size(l)) -> 2:int + Statistics: Num rows: 500 Data size: 913920 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 18270 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 18270 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select size(l) from listtable_orc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@listtable_orc +#### A masked pattern was here #### +POSTHOOK: query: select size(l) from listtable_orc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@listtable_orc +#### A masked pattern was here #### +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index c299796..7be4d7d 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -1,7 +1,9 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -15,6 +17,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -28,16 +33,11 @@ STAGE PLANS: expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 7 - Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: double) + sort order: ++ + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -46,9 +46,32 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true allNative: false usesVectorUDFAdaptor: false vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -57,26 +80,26 @@ STAGE PLANS: ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --1887561756 -10011.0 --1887561756 -13877.0 --1887561756 -2281.0 --1887561756 -8881.0 --1887561756 10361.0 --1887561756 1839.0 --1887561756 9531.0 +-1887561756 -15891.0 +-1887561756 -15951.0 +-1887561756 -16008.0 +-1887561756 -16183.0 +-1887561756 -16225.0 +-1887561756 -16243.0 +-1887561756 -16296.0 PREHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -118,17 +141,16 @@ STAGE PLANS: projectedOutputColumnNums: [0, 5, 1] Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ + key expressions: _col0 (type: tinyint), _col1 (type: double), _col2 (type: smallint) + sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] + keyColumnNums: [0, 5, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumnNums: [] Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -151,19 +173,19 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, KEY.reducesinkkey2:smallint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: smallint) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator @@ -193,11 +215,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -222,10 +244,10 @@ POSTHOOK: Input: default@alltypesorc -64 -8080.0 -8080 -64 -9842.0 -9842 PREHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -241,6 +263,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -279,14 +302,12 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkLongOperator keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] valueColumnNums: [1, 2] Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -344,22 +365,57 @@ STAGE PLANS: projectedOutputColumnNums: [0, 3] selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 3] native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -367,11 +423,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -396,10 +452,10 @@ POSTHOOK: Input: default@alltypesorc -64 373.52941176470586 NULL 9370.0945309795 PREHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -450,10 +506,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkObjectHashOperator keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] valueColumnNums: [] Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 @@ -525,11 +582,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -554,10 +611,10 @@ POSTHOOK: Input: default@alltypesorc -64 NULL PREHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -573,6 +630,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -674,22 +732,57 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -697,11 +790,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -726,10 +819,10 @@ POSTHOOK: Input: default@alltypesorc -64 24 NULL 2932 PREHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -745,19 +838,19 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -922,11 +1015,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/nullMap.q.out ql/src/test/results/clientpositive/null_map.q.out similarity index 100% rename from ql/src/test/results/clientpositive/nullMap.q.out rename to ql/src/test/results/clientpositive/null_map.q.out diff --git ql/src/test/results/clientpositive/orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/orc_nested_column_pruning.q.out new file mode 100644 index 0000000..e75ba9d --- /dev/null +++ ql/src/test/results/clientpositive/orc_nested_column_pruning.q.out @@ -0,0 +1,1856 @@ +PREHOOK: query: DROP TABLE IF EXISTS dummy +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dummy +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dummy (i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: CREATE TABLE dummy (i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: INSERT INTO TABLE dummy VALUES (42) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dummy +POSTHOOK: query: INSERT INTO TABLE dummy VALUES (42) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy.i SCRIPT [] +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>>, + s4 map>, + s5 struct>>>, + s6 map>>>> +) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_1 +POSTHOOK: query: CREATE TABLE nested_tbl_1 ( + a int, + s1 struct, f6: int>, + s2 struct, f11: map>>, + s3 struct>>, + s4 map>, + s5 struct>>>, + s6 map>>>> +) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_1 +PREHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@nested_tbl_1 +POSTHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@nested_tbl_1 +POSTHOOK: Lineage: nested_tbl_1.a SIMPLE [] +POSTHOOK: Lineage: nested_tbl_1.s1 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s2 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s3 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s4 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s5 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_1.s6 EXPRESSION [] +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_2 LIKE nested_tbl_1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_2 +POSTHOOK: query: CREATE TABLE nested_tbl_2 LIKE nested_tbl_1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_2 +PREHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@nested_tbl_2 +POSTHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@nested_tbl_2 +POSTHOOK: Lineage: nested_tbl_2.a SIMPLE [] +POSTHOOK: Lineage: nested_tbl_2.s1 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s2 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s3 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s4 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s5 EXPRESSION [] +POSTHOOK: Lineage: nested_tbl_2.s6 EXPRESSION [] +PREHOOK: query: EXPLAIN SELECT a FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT a FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 +PREHOOK: query: EXPLAIN SELECT s1.f1 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f1 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f1 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f1 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f1 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +false +PREHOOK: query: EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f1, s1.f2 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f1 (type: boolean), s1.f2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f1, s1.f2 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f1, s1.f2 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +false foo +PREHOOK: query: EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 4 +PREHOOK: query: EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f5 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f4, s2.f8.f9 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f4 (type: int), s2.f8.f9 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +4 true +PREHOOK: query: EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f2, s1.f1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f1 = false) (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f2 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +foo +PREHOOK: query: EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f5, s1.f3.f4 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f3.f4 = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f2, s2.f8 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and s2.f8.f11['key1']) (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s2.f8 (type: struct,f11:map>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f9":true,"f10":[10,11],"f11":{"key2":false,"key1":true}} +PREHOOK: query: EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s2.f8.f10 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s3 (type: struct>>) + outputColumnNames: s3 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col3, _col10 + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int) + outputColumnNames: _col10 + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col3.f12 (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: s2.f8.f10 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col3, _col10 + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int) + outputColumnNames: _col10 + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col3.f12 (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col10, _col11 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 4500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +10 {"f13":"foo","f14":14} +10 {"f13":"bar","f14":28} +11 {"f13":"foo","f14":14} +11 {"f13":"bar","f14":28} +PREHOOK: query: EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s2.f8.f10, s1.f3.f4 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (s2.f8.f10[1] pmod s1.f3.f4) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +3 +PREHOOK: query: EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3.f5, s1.f3.f4 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3.f5 (type: double), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 1 +PREHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 1 +PREHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f3 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +{"f4":4,"f5":5.0} 1 +PREHOOK: query: EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f3.f4 (type: int) + sort order: + + Map-reduce partition columns: _col0.f3.f4 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s2.f8.f9 = false) and s1.f6 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f6 (type: int) + sort order: + + Map-reduce partition columns: _col0.f6 (type: int) + Statistics: Num rows: 1 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct,f11:map>>) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0.f3.f4 (type: int) + 1 _col0.f6 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Input: default@nested_tbl_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Input: default@nested_tbl_2 +#### A masked pattern was here #### +5.0 {"f9":false,"f10":[20,22],"f11":{"key3":true,"key4":false}} +PREHOOK: query: EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f3.f4 (type: int) + sort order: + + Map-reduce partition columns: _col0.f3.f4 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f6 is not null and s2.f8.f9) (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0.f6 (type: int) + sort order: + + Map-reduce partition columns: _col0.f6 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct,f11:map>>) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0.f3.f4 (type: int) + 1 _col0.f6 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 {"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}} +PREHOOK: query: EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), true (type: boolean) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), true (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + TableScan + alias: t2 + Pruned Column Paths: s1.f6, s2.f8.f9 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s1.f6 is not null and s2.f8.f9 and s2.f8.f9 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1.f6 (type: int), (s2.f8.f9 = true) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int), true (type: boolean) + 1 _col0 (type: int), _col1 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s2 (type: struct,f11:map>>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f11:map>>) + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + residual filter predicates: {(_col0.f1 <> _col1.f8.f9)} + Statistics: Num rows: 1 Data size: 2251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2251 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2251 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f3.f4 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s1.f3.f4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f6:int>) + TableScan + alias: t2 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s1.f6 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s2 (type: struct,f11:map>>), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct,f11:map>>) + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2 + residual filter predicates: {(_col0.f1 <> _col2.f8.f9)} + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0.f3.f5 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1237 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +5.0 +PREHOOK: query: DROP TABLE IF EXISTS nested_tbl_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS nested_tbl_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_3 +PREHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Output: default@nested_tbl_3@f3=4 +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f1 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f2 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +PREHOOK: query: SELECT * FROM nested_tbl_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_3 +PREHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM nested_tbl_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_3 +POSTHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +false foo 4 +PREHOOK: query: EXPLAIN +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s3.f12, s1.f6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s3.f12[0].f14 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 14 +PREHOOK: query: EXPLAIN +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s4['key1'].f15 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 1 +PREHOOK: query: EXPLAIN +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s5.f16, s1.f6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s5.f16[0].f18.f19 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 14 +PREHOOK: query: EXPLAIN +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6, s5.f16 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s5 (type: struct>>>) + outputColumnNames: s1, s5 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f6) + keys: s5.f16.f18.f19 (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 [14,28] +PREHOOK: query: EXPLAIN +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s6['key1'].f20[0].f21.f22 (type: int), s1.f6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 1 +PREHOOK: query: EXPLAIN +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: nested_tbl_1 + Pruned Column Paths: s1.f6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s1 (type: struct,f6:int>), s6 (type: map>>>>) + outputColumnNames: s1, s6 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s1.f6) + keys: s6['key1'].f20.f21.f22 (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +#### A masked pattern was here #### +1 [1] diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out deleted file mode 100644 index 5cd757a..0000000 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ /dev/null @@ -1,946 +0,0 @@ -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 7 - Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - - Stage: Stage-0 - Fetch Operator - limit: 7 - Processor Tree: - ListSink - -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --1887561756 -10011.0 --1887561756 -13877.0 --1887561756 -2281.0 --1887561756 -8881.0 --1887561756 10361.0 --1887561756 1839.0 --1887561756 9531.0 -PREHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:tinyint) - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: smallint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --64 -10462.0 -10462 --64 -15920.0 -15920 --64 -1600.0 -1600 --64 -200.0 -200 --64 -2919.0 -2919 --64 -3097.0 -3097 --64 -3586.0 -3586 --64 -4018.0 -4018 --64 -4040.0 -4040 --64 -4803.0 -4803 --64 -6907.0 -6907 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -8080.0 -8080 --64 -9842.0 -9842 -PREHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Select Operator - expressions: ctinyint (type: tinyint), (cdouble + 1.0D) (type: double) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 13] - selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), count(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 2] - Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3] - selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double - Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --46 3033.55 --47 -574.6428571428571 --48 1672.909090909091 --49 768.7659574468086 --50 -960.0192307692307 --51 -96.46341463414635 --52 2810.705882352941 --53 -532.7567567567568 --54 2712.7272727272725 --55 2385.595744680851 --56 2595.818181818182 --57 1867.0535714285713 --58 3483.2444444444445 --59 318.27272727272725 --60 1071.82 --61 914.3404255319149 --62 245.69387755102042 --63 2178.7272727272725 --64 373.52941176470586 -NULL 9370.0945309795 -PREHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ctinyint (type: tinyint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY._col0:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --46 --47 --48 --49 --50 --51 --52 --53 --54 --55 --56 --57 --58 --59 --60 --61 --62 --63 --64 -NULL -PREHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Select Operator - expressions: ctinyint (type: tinyint), cdouble (type: double) - outputColumnNames: ctinyint, cdouble - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5] - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint, col 5:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ctinyint (type: tinyint), cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] - Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:tinyint, KEY._col1:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:tinyint, col 1:double - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] - keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1:double) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] - keys: _col0 (type: tinyint) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --46 24 --47 22 --48 29 --49 26 --50 30 --51 21 --52 33 --53 22 --54 26 --55 29 --56 36 --57 35 --58 23 --59 31 --60 27 --61 25 --62 27 --63 19 --64 24 -NULL 2932 -PREHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### -PREHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:tinyint) - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(ctinyint) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:double, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:double - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: bigint), _col0 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - Reducer 3 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --10462.0 -64 --1121.0 -89 --11322.0 -101 --11492.0 -78 --15920.0 -64 --4803.0 -64 --6907.0 -64 --7196.0 -2009 --8080.0 -64 --8118.0 -80 --9842.0 -64 -10496.0 -67 -15601.0 -1733 -3520.0 -86 -4811.0 -115 -5241.0 -80 -557.0 -75 -7705.0 -88 -9452.0 -76 -NULL -32768 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 517cb07..7474547 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -1,7 +1,9 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -25,16 +27,11 @@ STAGE PLANS: expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 7 - Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: double) + sort order: ++ + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: enabled: true @@ -45,6 +42,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -53,26 +69,26 @@ STAGE PLANS: ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --1887561756 -10011.0 --1887561756 -13877.0 --1887561756 -2281.0 --1887561756 -8881.0 --1887561756 10361.0 --1887561756 1839.0 --1887561756 9531.0 +-1887561756 -15891.0 +-1887561756 -15951.0 +-1887561756 -16008.0 +-1887561756 -16183.0 +-1887561756 -16225.0 +-1887561756 -16243.0 +-1887561756 -16296.0 PREHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -108,8 +124,8 @@ STAGE PLANS: projectedOutputColumnNums: [0, 5, 1] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ + key expressions: _col0 (type: tinyint), _col1 (type: double), _col2 (type: smallint) + sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -117,7 +133,6 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: smallint) Execution mode: vectorized Map Vectorization: enabled: true @@ -140,7 +155,7 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: smallint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit @@ -160,11 +175,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -189,10 +204,10 @@ POSTHOOK: Input: default@alltypesorc -64 -8080.0 -8080 -64 -9842.0 -9842 PREHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -200,7 +215,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -245,7 +261,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -278,16 +293,65 @@ STAGE PLANS: expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:double] + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:tinyint, _col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -295,11 +359,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -324,10 +388,10 @@ POSTHOOK: Input: default@alltypesorc -64 373.52941176470586 NULL 9370.0945309795 PREHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -421,11 +485,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -450,10 +514,10 @@ POSTHOOK: Input: default@alltypesorc -64 NULL PREHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -461,7 +525,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -485,7 +550,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -503,6 +567,55 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:bigint] + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:tinyint, _col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE @@ -520,11 +633,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -549,10 +662,10 @@ POSTHOOK: Input: default@alltypesorc -64 24 NULL 2932 PREHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -568,19 +681,19 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -726,11 +839,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here ####