diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index d9caa47..60660ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -192,7 +192,9 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT VectorizedBatchUtil.typeInfosFromStructObjectInspector( keyStructInspector), /* useExternalBuffer */ true, - binarySortableSerDe.getSortOrders())); + binarySortableSerDe.getSortOrders(), + binarySortableSerDe.getNullMarkers(), + binarySortableSerDe.getNotNullMarkers())); keyBinarySortableDeserializeToRow.init(0); final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 8e689fe..f0df2e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -35,8 +35,8 @@ import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.ArrayUtils; import org.apache.calcite.util.Pair; import org.apache.commons.lang3.tuple.ImmutablePair; import org.slf4j.Logger; @@ -442,6 +442,9 @@ public Vectorizer() { Set> nonVectorizedOps; + String reduceColumnSortOrder; + String reduceColumnNullOrder; + VectorTaskColumnInfo() { partitionColumnCount = 0; } @@ -488,6 +491,14 @@ public void setNonVectorizedOps(Set> nonVectori return nonVectorizedOps; } + public void setReduceColumnSortOrder(String reduceColumnSortOrder) { + this.reduceColumnSortOrder = reduceColumnSortOrder; + } + + public void setReduceColumnNullOrder(String reduceColumnNullOrder) { + this.reduceColumnNullOrder = reduceColumnNullOrder; + } + public void transferToBaseWork(BaseWork baseWork) { String[] allColumnNameArray = allColumnNames.toArray(new String[0]); @@ -513,6 +524,12 @@ public void transferToBaseWork(BaseWork baseWork) { mapWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); } + if (baseWork instanceof ReduceWork) { + ReduceWork reduceWork = (ReduceWork) baseWork; + reduceWork.setVectorReduceColumnSortOrder(reduceColumnSortOrder); + reduceWork.setVectorReduceColumnNullOrder(reduceColumnNullOrder); + } + baseWork.setAllNative(allNative); baseWork.setGroupByVectorOutput(groupByVectorOutput); baseWork.setUsesVectorUDFAdaptor(usesVectorUDFAdaptor); @@ -1150,6 +1167,8 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, return false; } + String columnSortOrder; + String columnNullOrder; try { TableDesc keyTableDesc = reduceWork.getKeyDesc(); if (LOG.isDebugEnabled()) { @@ -1157,10 +1176,11 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, } TableDesc valueTableDesc = reduceWork.getTagToValueDesc().get(reduceWork.getTag()); + Properties keyTableProperties = keyTableDesc.getProperties(); Deserializer keyDeserializer = ReflectionUtils.newInstance( keyTableDesc.getDeserializerClass(), null); - SerDeUtils.initializeSerDe(keyDeserializer, null, keyTableDesc.getProperties(), null); + SerDeUtils.initializeSerDe(keyDeserializer, null, keyTableProperties, null); ObjectInspector keyObjectInspector = keyDeserializer.getObjectInspector(); if (keyObjectInspector == null) { setNodeIssue("Key object inspector null"); @@ -1178,6 +1198,9 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); } + columnSortOrder = keyTableProperties.getProperty(serdeConstants.SERIALIZATION_SORT_ORDER); + columnNullOrder = keyTableProperties.getProperty(serdeConstants.SERIALIZATION_NULL_SORT_ORDER); + Deserializer valueDeserializer = ReflectionUtils.newInstance( valueTableDesc.getDeserializerClass(), null); @@ -1203,6 +1226,9 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, vectorTaskColumnInfo.setAllColumnNames(reduceColumnNames); vectorTaskColumnInfo.setAllTypeInfos(reduceTypeInfos); + vectorTaskColumnInfo.setReduceColumnSortOrder(columnSortOrder); + vectorTaskColumnInfo.setReduceColumnNullOrder(columnNullOrder); + return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index dfed017..0cb1e57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -101,6 +101,9 @@ public ReduceWork(String name) { private boolean reduceVectorizationEnabled; private String vectorReduceEngine; + private String vectorReduceColumnSortOrder; + private String vectorReduceColumnNullOrder; + /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -260,6 +263,22 @@ public String getVectorReduceEngine() { return vectorReduceEngine; } + public void setVectorReduceColumnSortOrder(String vectorReduceColumnSortOrder) { + this.vectorReduceColumnSortOrder = vectorReduceColumnSortOrder; + } + + public String getVectorReduceColumnSortOrder() { + return vectorReduceColumnSortOrder; + } + + public void setVectorReduceColumnNullOrder(String vectorReduceColumnNullOrder) { + this.vectorReduceColumnNullOrder = vectorReduceColumnNullOrder; + } + + public String getVectorReduceColumnNullOrder() { + return vectorReduceColumnNullOrder; + } + // Use LinkedHashSet to give predictable display order. private static Set reduceVectorizableEngines = new LinkedHashSet(Arrays.asList("tez", "spark")); @@ -311,6 +330,22 @@ public ReduceExplainVectorization(ReduceWork reduceWork) { } return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions); } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "reduceColumnSortOrder", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getReduceColumnSortOrder() { + if (!getVectorizationExamined()) { + return null; + } + return reduceWork.getVectorReduceColumnSortOrder(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "reduceColumnNullOrder", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getReduceColumnNullOrder() { + if (!getVectorizationExamined()) { + return null; + } + return reduceWork.getVectorReduceColumnNullOrder(); + } } @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index b29bb8b..822fff2 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -579,8 +579,6 @@ void testVectorDeserializeRow(Random r, SerializationType serializationType, for (int i = 0; i < fieldCount; i++) { columnSortOrderIsDesc[i] = r.nextBoolean(); } - deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, - columnSortOrderIsDesc); byte[] columnNullMarker = new byte[fieldCount]; byte[] columnNotNullMarker = new byte[fieldCount]; @@ -598,6 +596,9 @@ void testVectorDeserializeRow(Random r, SerializationType serializationType, } } serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); + deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); + } boolean useBinarySortableCharsNeedingEscape = alternate2; if (useBinarySortableCharsNeedingEscape) { diff --git ql/src/test/queries/clientpositive/vector_order_null.q ql/src/test/queries/clientpositive/vector_order_null.q new file mode 100644 index 0000000..d2034c3 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_order_null.q @@ -0,0 +1,56 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table src_null (a int, b string); +insert into src_null values (1, 'A'); +insert into src_null values (null, null); +insert into src_null values (3, null); +insert into src_null values (2, null); +insert into src_null values (2, 'A'); +insert into src_null values (2, 'B'); + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc; +SELECT x.* FROM src_null x ORDER BY a asc; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc; +SELECT x.* FROM src_null x ORDER BY a desc; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last; +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc; +SELECT x.* FROM src_null x ORDER BY b desc, a asc; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first; +SELECT x.* FROM src_null x ORDER BY a asc nulls first; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first; +SELECT x.* FROM src_null x ORDER BY a desc nulls first; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a; +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a; +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc; +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last; +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last; + +EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last; +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last; diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index c3809d3..cfc87fb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -126,6 +126,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -288,6 +290,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 2e980b9..6c6d0f3 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -5991,6 +5991,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -6180,6 +6182,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -6371,6 +6375,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -6562,6 +6568,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -6756,6 +6764,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -6950,6 +6960,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -7144,6 +7156,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -7327,6 +7341,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -7515,6 +7531,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -7719,6 +7737,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -7959,6 +7979,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -8158,6 +8180,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -8416,6 +8440,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -8636,6 +8662,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -8868,6 +8896,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -9103,6 +9133,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -9338,6 +9370,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -9575,6 +9609,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -9849,6 +9885,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -10232,6 +10270,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -10424,6 +10464,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -10618,6 +10660,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -10812,6 +10856,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -11009,6 +11055,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -11206,6 +11254,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -11403,6 +11453,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -11589,6 +11641,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -11780,6 +11834,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -11987,6 +12043,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -12227,6 +12285,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -12429,6 +12489,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -12687,6 +12749,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -12907,6 +12971,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -13139,6 +13205,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -13374,6 +13442,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -13609,6 +13679,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -13846,6 +13918,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -14126,6 +14200,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -14512,6 +14588,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -14704,6 +14782,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -14898,6 +14978,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -15092,6 +15174,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -15289,6 +15373,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -15486,6 +15572,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -15683,6 +15771,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -15869,6 +15959,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -16060,6 +16152,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -16267,6 +16361,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -16507,6 +16603,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -16709,6 +16807,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -16967,6 +17067,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -17187,6 +17289,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -17419,6 +17523,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -17654,6 +17760,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -17889,6 +17997,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -18126,6 +18236,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false @@ -18406,6 +18518,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false diff --git ql/src/test/results/clientpositive/llap/vector_order_null.q.out ql/src/test/results/clientpositive/llap/vector_order_null.q.out new file mode 100644 index 0000000..64f9ab3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_order_null.q.out @@ -0,0 +1,1364 @@ +PREHOOK: query: create table src_null (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_null +POSTHOOK: query: create table src_null (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_null +PREHOOK: query: insert into src_null values (1, 'A') +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (1, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into src_null values (null, null) +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (null, null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into src_null values (3, null) +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (3, null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, null) +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, 'A') +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, 'B') +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'B') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: - + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +3 NULL +2 NULL +2 A +2 B +1 A +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: - + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +3 NULL +2 NULL +2 A +2 B +1 A +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +1 A +2 A +2 B +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: +- + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +1 A +2 B +2 A +2 NULL +3 NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -- + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: -- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index a39f59d..c089d00 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -846,6 +846,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index e3104d2..f1319a9 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -432,6 +432,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index da5b7d3..098b898 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -852,6 +852,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 1c9a9ea..f238b0a 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -433,6 +433,8 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index 41087dc..19d4550 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -54,6 +54,9 @@ // The sort order (ascending/descending) for each field. Set to true when descending (invert). private boolean[] columnSortOrderIsDesc; + byte[] columnNullMarker; + byte[] columnNotNullMarker; + // Which field we are on. We start with -1 so readNextField can increment once and the read // field data methods don't increment. private int fieldIndex; @@ -80,19 +83,40 @@ */ public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, boolean useExternalBuffer) { - this(primitiveTypeInfos, useExternalBuffer, null); + this(primitiveTypeInfos, useExternalBuffer, null, null, null); } public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer, - boolean[] columnSortOrderIsDesc) { + boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { super(typeInfos, useExternalBuffer); - fieldCount = typeInfos.length; + final int count = typeInfos.length; + fieldCount = count; if (columnSortOrderIsDesc != null) { this.columnSortOrderIsDesc = columnSortOrderIsDesc; } else { - this.columnSortOrderIsDesc = new boolean[typeInfos.length]; + this.columnSortOrderIsDesc = new boolean[count]; Arrays.fill(this.columnSortOrderIsDesc, false); } + if (columnNullMarker != null) { + this.columnNullMarker = columnNullMarker; + this.columnNotNullMarker = columnNotNullMarker; + } else { + this.columnNullMarker = new byte[count]; + this.columnNotNullMarker = new byte[count]; + for (int i = 0; i < count; i++) { + if (this.columnSortOrderIsDesc[i]) { + // Descending + // Null last (default for descending order) + this.columnNullMarker[i] = BinarySortableSerDe.ZERO; + this.columnNotNullMarker[i] = BinarySortableSerDe.ONE; + } else { + // Ascending + // Null first (default for ascending order) + this.columnNullMarker[i] = BinarySortableSerDe.ZERO; + this.columnNotNullMarker[i] = BinarySortableSerDe.ONE; + } + } + } inputByteBuffer = new InputByteBuffer(); internalBufferLen = -1; } @@ -142,6 +166,11 @@ public String getDetailedReadPositionString() { } sb.append(" column sort order "); sb.append(Arrays.toString(columnSortOrderIsDesc)); + // UNDONE: Convert byte 0 or 1 to character. + sb.append(" column null marker "); + sb.append(Arrays.toString(columnNullMarker)); + sb.append(" column non null marker "); + sb.append(Arrays.toString(columnNotNullMarker)); return sb.toString(); } @@ -175,7 +204,7 @@ public boolean readNextField() throws IOException { byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); - if (isNullByte == 0) { + if (isNullByte == columnNullMarker[fieldIndex]) { return false; } diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java index 5f5b03a..b369462 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java @@ -119,7 +119,9 @@ private void testBinarySortableFast( new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, - columnSortOrderIsDesc); + columnSortOrderIsDesc, + columnNullMarker, + columnNotNullMarker); BytesWritable bytesWritable = serializeWriteBytes[i]; binarySortableDeserializeRead.set( @@ -147,7 +149,9 @@ private void testBinarySortableFast( new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, - columnSortOrderIsDesc); + columnSortOrderIsDesc, + columnNullMarker, + columnNotNullMarker); binarySortableDeserializeRead2.set( bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte. @@ -280,7 +284,9 @@ private void testBinarySortableFast( new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, - columnSortOrderIsDesc); + columnSortOrderIsDesc, + columnNullMarker, + columnNotNullMarker); BytesWritable bytesWritable = serdeBytes[i]; @@ -366,6 +372,7 @@ private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Ra boolean[] columnSortOrderIsDesc = new boolean[columnCount]; Arrays.fill(columnSortOrderIsDesc, false); + byte[] columnNullMarker = new byte[columnCount]; Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); byte[] columnNotNullMarker = new byte[columnCount];