diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index 74b9c58..c0b74ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -1007,6 +1007,8 @@ public Object getWritableKeyValue(VectorHashKeyWrapper kw, int keyIndex, case DECIMAL: return keyOutputWriter.writeValue( kw.getDecimal(columnTypeSpecificIndex)); + case DECIMAL_64: + throw new RuntimeException("Getting writable for DECIMAL_64 not supported"); case TIMESTAMP: return keyOutputWriter.writeValue( kw.getTimestamp(columnTypeSpecificIndex)); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index b8d7150..2584d28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -98,12 +98,9 @@ public VectorMapJoinOperator (CompilationOpContext ctx, OperatorDesc conf, bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); - List keyDesc = desc.getKeys().get(posBigTable); - keyExpressions = vContext.getVectorExpressions(keyDesc); + keyExpressions = this.vectorDesc.getAllBigTableKeyExpressions(); - // We're only going to evaluate the big table vectorized expressions, - Map> exprs = desc.getExprs(); - bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); + bigTableValueExpressions = this.vectorDesc.getAllBigTableValueExpressions(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 13eff51..d5fccb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -696,6 +696,20 @@ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc return expr; } + public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) + throws HiveException { + VectorExpression[] vecExprs = + getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + final int size = vecExprs.length; + for (int i = 0; i < size; i++) { + VectorExpression vecExpr = vecExprs[i]; + if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { + vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); + } + } + return vecExprs; + } + public VectorExpression[] getVectorExpressions(List exprNodes) throws HiveException { return getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index 4afbc03..b1fe818 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -253,12 +253,12 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); bigTableKeyTypeInfos = vectorMapJoinInfo.getBigTableKeyTypeInfos(); - bigTableKeyExpressions = vectorMapJoinInfo.getBigTableKeyExpressions(); + bigTableKeyExpressions = vectorMapJoinInfo.getSlimmedBigTableKeyExpressions(); bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); bigTableValueColumnNames = vectorMapJoinInfo.getBigTableValueColumnNames(); bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); - bigTableValueExpressions = vectorMapJoinInfo.getBigTableValueExpressions(); + bigTableValueExpressions = vectorMapJoinInfo.getSlimmedBigTableValueExpressions(); bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 263d2c7..810791a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -3008,7 +3008,9 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // information first.... List keyDesc = desc.getKeys().get(posBigTable); - VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); + + // For now, we don't support joins on or using DECIMAL_64. + VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyDesc); final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; boolean supportsKeyTypes = true; // Assume. HashSet notSupportedKeyTypes = new HashSet(); @@ -3019,7 +3021,7 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength]; TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength]; ArrayList bigTableKeyExpressionsList = new ArrayList(); - VectorExpression[] bigTableKeyExpressions; + VectorExpression[] slimmedBigTableKeyExpressions; for (int i = 0; i < allBigTableKeyExpressionsLength; i++) { VectorExpression ve = allBigTableKeyExpressions[i]; if (!IdentityExpression.isColumnOnly(ve)) { @@ -3043,13 +3045,15 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi bigTableKeyTypeInfos[i] = typeInfo; } if (bigTableKeyExpressionsList.size() == 0) { - bigTableKeyExpressions = null; + slimmedBigTableKeyExpressions = null; } else { - bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); + slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); } List bigTableExprs = desc.getExprs().get(posBigTable); - VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); + + // For now, we don't support joins on or using DECIMAL_64. + VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressionsUpConvertDecimal64(bigTableExprs); boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, @@ -3071,7 +3075,7 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length]; ArrayList bigTableValueExpressionsList = new ArrayList(); - VectorExpression[] bigTableValueExpressions; + VectorExpression[] slimmedBigTableValueExpressions; for (int i = 0; i < bigTableValueColumnMap.length; i++) { VectorExpression ve = allBigTableValueExpressions[i]; if (!IdentityExpression.isColumnOnly(ve)) { @@ -3084,20 +3088,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi bigTableValueTypeInfos[i] = exprNode.getTypeInfo(); } if (bigTableValueExpressionsList.size() == 0) { - bigTableValueExpressions = null; + slimmedBigTableValueExpressions = null; } else { - bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]); + slimmedBigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]); } vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap); vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames); vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos); - vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions); + vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions); + + vectorDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions); vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap); vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames); vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos); - vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions); + vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions); + + vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); /* * Small table information. @@ -3948,7 +3956,10 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf(); List keysDesc = groupByDesc.getKeys(); - VectorExpression[] vecKeyExpressions = vContext.getVectorExpressions(keysDesc); + + // For now, we don't support group by on DECIMAL_64 keys. + VectorExpression[] vecKeyExpressions = + vContext.getVectorExpressionsUpConvertDecimal64(keysDesc); ArrayList aggrDesc = groupByDesc.getAggregators(); final int size = aggrDesc.size(); @@ -4425,10 +4436,10 @@ private static VectorPTFInfo createVectorPTFInfo(Operator getBigTableKeyExpressions() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); + return vectorExpressionsToStringList( + isNative ? + vectorMapJoinInfo.getSlimmedBigTableKeyExpressions() : + vectorMapJoinDesc.getAllBigTableKeyExpressions()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) @@ -500,10 +501,10 @@ public String getBigTableKeyColumnNums() { @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableValueExpressions() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); + return vectorExpressionsToStringList( + isNative ? + vectorMapJoinInfo.getSlimmedBigTableValueExpressions() : + vectorMapJoinDesc.getAllBigTableValueExpressions()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 99a4958..99602a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -20,6 +20,7 @@ import java.util.List; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -97,6 +98,9 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { private VectorMapJoinVariation vectorMapJoinVariation; private boolean minMaxEnabled; + private VectorExpression[] allBigTableKeyExpressions; + private VectorExpression[] allBigTableValueExpressions; + private VectorMapJoinInfo vectorMapJoinInfo; public VectorMapJoinDesc() { @@ -105,6 +109,10 @@ public VectorMapJoinDesc() { hashTableKeyType = HashTableKeyType.NONE; vectorMapJoinVariation = VectorMapJoinVariation.NONE; minMaxEnabled = false; + + allBigTableKeyExpressions = null; + allBigTableValueExpressions = null; + vectorMapJoinInfo = null; } @@ -162,6 +170,22 @@ public void setMinMaxEnabled(boolean minMaxEnabled) { this.minMaxEnabled = minMaxEnabled; } + public VectorExpression[] getAllBigTableKeyExpressions() { + return allBigTableKeyExpressions; + } + + public void setAllBigTableKeyExpressions(VectorExpression[] allBigTableKeyExpressions) { + this.allBigTableKeyExpressions = allBigTableKeyExpressions; + } + + public VectorExpression[] getAllBigTableValueExpressions() { + return allBigTableValueExpressions; + } + + public void setAllBigTableValueExpressions(VectorExpression[] allBigTableValueExpressions) { + this.allBigTableValueExpressions = allBigTableValueExpressions; + } + public void setVectorMapJoinInfo(VectorMapJoinInfo vectorMapJoinInfo) { Preconditions.checkState(vectorMapJoinInfo != null); this.vectorMapJoinInfo = vectorMapJoinInfo; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java index 7432efa..36d6460 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -41,12 +41,12 @@ private int[] bigTableKeyColumnMap; private String[] bigTableKeyColumnNames; private TypeInfo[] bigTableKeyTypeInfos; - private VectorExpression[] bigTableKeyExpressions; + private VectorExpression[] slimmedBigTableKeyExpressions; private int[] bigTableValueColumnMap; private String[] bigTableValueColumnNames; private TypeInfo[] bigTableValueTypeInfos; - private VectorExpression[] bigTableValueExpressions; + private VectorExpression[] slimmedBigTableValueExpressions; private VectorColumnOutputMapping bigTableRetainedMapping; private VectorColumnOutputMapping bigTableOuterKeyMapping; @@ -58,12 +58,12 @@ public VectorMapJoinInfo() { bigTableKeyColumnMap = null; bigTableKeyColumnNames = null; bigTableKeyTypeInfos = null; - bigTableKeyExpressions = null; + slimmedBigTableKeyExpressions = null; bigTableValueColumnMap = null; bigTableValueColumnNames = null; bigTableValueTypeInfos = null; - bigTableValueExpressions = null; + slimmedBigTableValueExpressions = null; bigTableRetainedMapping = null; bigTableOuterKeyMapping = null; @@ -96,12 +96,12 @@ public void setBigTableKeyTypeInfos(TypeInfo[] bigTableKeyTypeInfos) { this.bigTableKeyTypeInfos = bigTableKeyTypeInfos; } - public VectorExpression[] getBigTableKeyExpressions() { - return bigTableKeyExpressions; + public VectorExpression[] getSlimmedBigTableKeyExpressions() { + return slimmedBigTableKeyExpressions; } - public void setBigTableKeyExpressions(VectorExpression[] bigTableKeyExpressions) { - this.bigTableKeyExpressions = bigTableKeyExpressions; + public void setSlimmedBigTableKeyExpressions(VectorExpression[] slimmedBigTableKeyExpressions) { + this.slimmedBigTableKeyExpressions = slimmedBigTableKeyExpressions; } @@ -129,12 +129,12 @@ public void setBigTableValueTypeInfos(TypeInfo[] bigTableValueTypeInfos) { this.bigTableValueTypeInfos = bigTableValueTypeInfos; } - public VectorExpression[] getBigTableValueExpressions() { - return bigTableValueExpressions; + public VectorExpression[] getSlimmedBigTableValueExpressions() { + return slimmedBigTableValueExpressions; } - public void setBigTableValueExpressions(VectorExpression[] bigTableValueExpressions) { - this.bigTableValueExpressions = bigTableValueExpressions; + public void setSlimmedBigTableValueExpressions(VectorExpression[] slimmedBigTableValueExpressions) { + this.slimmedBigTableValueExpressions = slimmedBigTableValueExpressions; } public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index eec1f65..6a03924 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -211,12 +211,16 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorMapJoinInfo.setBigTableKeyColumnMap(testDesc.bigTableKeyColumnNums); vectorMapJoinInfo.setBigTableKeyColumnNames(testDesc.bigTableKeyColumnNames); vectorMapJoinInfo.setBigTableKeyTypeInfos(testDesc.bigTableKeyTypeInfos); - vectorMapJoinInfo.setBigTableKeyExpressions(null); + vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(null); + + vectorDesc.setAllBigTableKeyExpressions(null); vectorMapJoinInfo.setBigTableValueColumnMap(new int[0]); vectorMapJoinInfo.setBigTableValueColumnNames(new String[0]); vectorMapJoinInfo.setBigTableValueTypeInfos(new TypeInfo[0]); - vectorMapJoinInfo.setBigTableValueExpressions(null); + vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null); + + vectorDesc.setAllBigTableValueExpressions(null); VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); diff --git ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index 495be4d..940cc12 100644 --- ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -22,10 +22,10 @@ STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k; -CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC; -INSERT INTO TABLE t1 select `dec` from over1k; -CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC; -INSERT INTO TABLE t2 select `dec` from over1k; +CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC; +INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k; +CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC; +INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k; explain vectorization detail select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`); @@ -34,6 +34,13 @@ select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`); select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`); +explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`); + +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`); + + + -- DECIMAL_64 CREATE TABLE over1k_small(t tinyint, @@ -45,17 +52,17 @@ CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_small; -CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC; -INSERT INTO TABLE t1 select `dec` from over1k_small; -CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC; -INSERT INTO TABLE t2 select `dec` from over1k_small; +CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE; +INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small; +CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE; +INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small; explain vectorization detail select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); @@ -63,3 +70,24 @@ select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.` -- SORT_QUERY_RESULTS select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + + +set hive.vectorized.input.format.supports.enabled=none; + +explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +-- SORT_QUERY_RESULTS + +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 3c302b6..aca8dc0 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -38,40 +38,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: Lineage: t1.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2 -POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2 -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] +POSTHOOK: Lineage: t2.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`) PREHOOK: type: QUERY @@ -101,7 +103,7 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -124,6 +126,8 @@ STAGE PLANS: 0 _col0 (type: decimal(26,2)) 1 _col0 (type: decimal(26,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -155,9 +159,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(22,2) + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Map 2 @@ -167,7 +171,7 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:value_dec:decimal(24,0), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -206,9 +210,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(24,0) + dataColumns: dec:decimal(24,0), value_dec:decimal(24,0) partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -334,6 +338,271 @@ POSTHOOK: Input: default@t2 9.00 9 9.00 9 9.00 9 +PREHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1049 Data size: 223552 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(22,2)), value_dec (type: decimal(22,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2), col 1:decimal(22,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(24,0)] + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1049 Data size: 223552 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:value_dec:decimal(24,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(24,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(24,0)), value_dec (type: decimal(24,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(26,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(26,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,0)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(24,0), value_dec:decimal(24,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +14.00 33.66 14 10 +14.00 33.66 14 22 +14.00 33.66 14 34 +14.00 33.66 14 39 +14.00 33.66 14 42 +14.00 33.66 14 45 +14.00 33.66 14 46 +14.00 33.66 14 49 +14.00 33.66 14 5 +17.00 14.26 17 1 +17.00 14.26 17 14 +17.00 14.26 17 16 +17.00 14.26 17 19 +17.00 14.26 17 2 +17.00 14.26 17 22 +17.00 14.26 17 29 +17.00 14.26 17 3 +17.00 14.26 17 4 +17.00 14.26 17 44 +45.00 23.55 45 1 +45.00 23.55 45 2 +45.00 23.55 45 22 +45.00 23.55 45 24 +45.00 23.55 45 42 +6.00 29.78 6 16 +6.00 29.78 6 28 +6.00 29.78 6 30 +6.00 29.78 6 34 +6.00 29.78 6 36 +6.00 29.78 6 44 +62.00 21.02 62 15 +62.00 21.02 62 15 +62.00 21.02 62 21 +62.00 21.02 62 21 +62.00 21.02 62 22 +62.00 21.02 62 25 +62.00 21.02 62 29 +62.00 21.02 62 3 +62.00 21.02 62 34 +62.00 21.02 62 47 +62.00 21.02 62 47 +62.00 21.02 62 49 +64.00 37.76 64 0 +64.00 37.76 64 10 +64.00 37.76 64 10 +64.00 37.76 64 13 +64.00 37.76 64 23 +64.00 37.76 64 25 +64.00 37.76 64 26 +64.00 37.76 64 27 +64.00 37.76 64 27 +64.00 37.76 64 30 +64.00 37.76 64 32 +64.00 37.76 64 34 +64.00 37.76 64 35 +64.00 37.76 64 38 +64.00 37.76 64 40 +64.00 37.76 64 43 +64.00 37.76 64 5 +64.00 37.76 64 50 +70.00 24.59 70 2 +70.00 24.59 70 25 +70.00 24.59 70 27 +70.00 24.59 70 28 +70.00 24.59 70 3 +70.00 24.59 70 32 +70.00 24.59 70 44 +79.00 15.12 79 1 +79.00 15.12 79 15 +79.00 15.12 79 25 +79.00 15.12 79 30 +79.00 15.12 79 35 +79.00 15.12 79 35 +89.00 15.09 89 1 +89.00 15.09 89 15 +89.00 15.09 89 23 +89.00 15.09 89 27 +89.00 15.09 89 28 +89.00 15.09 89 29 +89.00 15.09 89 30 +89.00 15.09 89 32 +89.00 15.09 89 39 +89.00 15.09 89 40 +89.00 15.09 89 45 +89.00 15.09 89 7 +9.00 48.96 9 12 +9.00 48.96 9 15 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 20 +9.00 48.96 9 20 +9.00 48.96 9 21 +9.00 48.96 9 21 +9.00 48.96 9 26 +9.00 48.96 9 27 +9.00 48.96 9 34 +9.00 48.96 9 38 +9.00 48.96 9 41 +9.00 48.96 9 42 +9.00 48.96 9 45 +9.00 48.96 9 48 +9.00 48.96 9 49 +9.00 48.96 9 5 +9.00 48.96 9 7 +9.00 48.96 9 7 PREHOOK: query: CREATE TABLE over1k_small(t tinyint, si smallint, i int, @@ -343,7 +612,7 @@ PREHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -359,7 +628,7 @@ POSTHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -374,40 +643,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k_small -PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1_small -POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1_small -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: Output: default@t1_small +POSTHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t1 -POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: Output: default@t1_small +POSTHOOK: Lineage: t1_small.dec SIMPLE [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t1_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2_small -POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2_small -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: Output: default@t2_small +POSTHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Output: default@t2_small +POSTHOOK: Lineage: t2_small.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t2_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) PREHOOK: type: QUERY @@ -434,32 +705,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1_small - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,2), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)) + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,2)) + expressions: dec (type: decimal(14,2)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -468,83 +741,85 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap - LLAP IO: all inputs + LLAP IO: no inputs Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,2) + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan alias: t2_small - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,0), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,0)) + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,0)) + expressions: dec (type: decimal(14,0)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: decimal(6,2)) + key expressions: _col0 (type: decimal(16,2)) sort order: + - Map-reduce partition columns: _col0 (type: decimal(6,2)) + Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap - LLAP IO: all inputs + LLAP IO: no inputs Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,0) + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -564,3 +839,909 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_small POSTHOOK: Input: default@t2_small #### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 223552 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 223552 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(16,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(16,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(14,0)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +14.00 33.66 14 10 +14.00 33.66 14 22 +14.00 33.66 14 34 +14.00 33.66 14 39 +14.00 33.66 14 42 +14.00 33.66 14 45 +14.00 33.66 14 46 +14.00 33.66 14 49 +14.00 33.66 14 5 +17.00 14.26 17 1 +17.00 14.26 17 14 +17.00 14.26 17 16 +17.00 14.26 17 19 +17.00 14.26 17 2 +17.00 14.26 17 22 +17.00 14.26 17 29 +17.00 14.26 17 3 +17.00 14.26 17 4 +17.00 14.26 17 44 +45.00 23.55 45 1 +45.00 23.55 45 2 +45.00 23.55 45 22 +45.00 23.55 45 24 +45.00 23.55 45 42 +6.00 29.78 6 16 +6.00 29.78 6 28 +6.00 29.78 6 30 +6.00 29.78 6 34 +6.00 29.78 6 36 +6.00 29.78 6 44 +62.00 21.02 62 15 +62.00 21.02 62 15 +62.00 21.02 62 21 +62.00 21.02 62 21 +62.00 21.02 62 22 +62.00 21.02 62 25 +62.00 21.02 62 29 +62.00 21.02 62 3 +62.00 21.02 62 34 +62.00 21.02 62 47 +62.00 21.02 62 47 +62.00 21.02 62 49 +64.00 37.76 64 0 +64.00 37.76 64 10 +64.00 37.76 64 10 +64.00 37.76 64 13 +64.00 37.76 64 23 +64.00 37.76 64 25 +64.00 37.76 64 26 +64.00 37.76 64 27 +64.00 37.76 64 27 +64.00 37.76 64 30 +64.00 37.76 64 32 +64.00 37.76 64 34 +64.00 37.76 64 35 +64.00 37.76 64 38 +64.00 37.76 64 40 +64.00 37.76 64 43 +64.00 37.76 64 5 +64.00 37.76 64 50 +70.00 24.59 70 2 +70.00 24.59 70 25 +70.00 24.59 70 27 +70.00 24.59 70 28 +70.00 24.59 70 3 +70.00 24.59 70 32 +70.00 24.59 70 44 +79.00 15.12 79 1 +79.00 15.12 79 15 +79.00 15.12 79 25 +79.00 15.12 79 30 +79.00 15.12 79 35 +79.00 15.12 79 35 +89.00 15.09 89 1 +89.00 15.09 89 15 +89.00 15.09 89 23 +89.00 15.09 89 27 +89.00 15.09 89 28 +89.00 15.09 89 29 +89.00 15.09 89 30 +89.00 15.09 89 32 +89.00 15.09 89 39 +89.00 15.09 89 40 +89.00 15.09 89 45 +89.00 15.09 89 7 +9.00 48.96 9 12 +9.00 48.96 9 15 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 20 +9.00 48.96 9 20 +9.00 48.96 9 21 +9.00 48.96 9 21 +9.00 48.96 9 26 +9.00 48.96 9 27 +9.00 48.96 9 34 +9.00 48.96 9 38 +9.00 48.96 9 41 +9.00 48.96 9 42 +9.00 48.96 9 45 +9.00 48.96 9 48 +9.00 48.96 9 49 +9.00 48.96 9 5 +9.00 48.96 9 7 +9.00 48.96 9 7 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(16,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(16,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 223552 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 223552 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(16,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(16,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(14,0)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +14.00 33.66 14 10 +14.00 33.66 14 22 +14.00 33.66 14 34 +14.00 33.66 14 39 +14.00 33.66 14 42 +14.00 33.66 14 45 +14.00 33.66 14 46 +14.00 33.66 14 49 +14.00 33.66 14 5 +17.00 14.26 17 1 +17.00 14.26 17 14 +17.00 14.26 17 16 +17.00 14.26 17 19 +17.00 14.26 17 2 +17.00 14.26 17 22 +17.00 14.26 17 29 +17.00 14.26 17 3 +17.00 14.26 17 4 +17.00 14.26 17 44 +45.00 23.55 45 1 +45.00 23.55 45 2 +45.00 23.55 45 22 +45.00 23.55 45 24 +45.00 23.55 45 42 +6.00 29.78 6 16 +6.00 29.78 6 28 +6.00 29.78 6 30 +6.00 29.78 6 34 +6.00 29.78 6 36 +6.00 29.78 6 44 +62.00 21.02 62 15 +62.00 21.02 62 15 +62.00 21.02 62 21 +62.00 21.02 62 21 +62.00 21.02 62 22 +62.00 21.02 62 25 +62.00 21.02 62 29 +62.00 21.02 62 3 +62.00 21.02 62 34 +62.00 21.02 62 47 +62.00 21.02 62 47 +62.00 21.02 62 49 +64.00 37.76 64 0 +64.00 37.76 64 10 +64.00 37.76 64 10 +64.00 37.76 64 13 +64.00 37.76 64 23 +64.00 37.76 64 25 +64.00 37.76 64 26 +64.00 37.76 64 27 +64.00 37.76 64 27 +64.00 37.76 64 30 +64.00 37.76 64 32 +64.00 37.76 64 34 +64.00 37.76 64 35 +64.00 37.76 64 38 +64.00 37.76 64 40 +64.00 37.76 64 43 +64.00 37.76 64 5 +64.00 37.76 64 50 +70.00 24.59 70 2 +70.00 24.59 70 25 +70.00 24.59 70 27 +70.00 24.59 70 28 +70.00 24.59 70 3 +70.00 24.59 70 32 +70.00 24.59 70 44 +79.00 15.12 79 1 +79.00 15.12 79 15 +79.00 15.12 79 25 +79.00 15.12 79 30 +79.00 15.12 79 35 +79.00 15.12 79 35 +89.00 15.09 89 1 +89.00 15.09 89 15 +89.00 15.09 89 23 +89.00 15.09 89 27 +89.00 15.09 89 28 +89.00 15.09 89 29 +89.00 15.09 89 30 +89.00 15.09 89 32 +89.00 15.09 89 39 +89.00 15.09 89 40 +89.00 15.09 89 45 +89.00 15.09 89 7 +9.00 48.96 9 12 +9.00 48.96 9 15 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 20 +9.00 48.96 9 20 +9.00 48.96 9 21 +9.00 48.96 9 21 +9.00 48.96 9 26 +9.00 48.96 9 27 +9.00 48.96 9 34 +9.00 48.96 9 38 +9.00 48.96 9 41 +9.00 48.96 9 42 +9.00 48.96 9 45 +9.00 48.96 9 48 +9.00 48.96 9 49 +9.00 48.96 9 5 +9.00 48.96 9 7 +9.00 48.96 9 7 diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5c8aed8..7c34aff 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -692,6 +692,8 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 7f25162..916d7a4 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -311,6 +311,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int className: VectorMapJoinOuterFilteredOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -451,6 +453,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int className: VectorMapJoinOuterFilteredOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 25b8004..3f00869 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -5947,6 +5947,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6147,6 +6149,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6349,6 +6353,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6543,6 +6549,8 @@ STAGE PLANS: 0 key (type: int) 1 _col1 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6756,6 +6764,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7016,6 +7026,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7221,6 +7233,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7423,6 +7437,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7559,6 +7575,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7772,6 +7790,8 @@ STAGE PLANS: 0 key (type: int) 1 (2 * _col0) (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7973,6 +7993,8 @@ STAGE PLANS: 1 key (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -8237,6 +8259,8 @@ STAGE PLANS: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:string + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -8447,6 +8471,8 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -8716,6 +8742,8 @@ STAGE PLANS: 1 key (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -9983,6 +10011,8 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -9998,6 +10028,8 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: + bigTableKeyExpressions: col 1:string + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -10272,6 +10304,8 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyExpressions: col 1:string + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -12488,6 +12522,8 @@ STAGE PLANS: 1 key (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -12965,6 +13001,8 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -13234,6 +13272,8 @@ STAGE PLANS: 1 key (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -17015,6 +17055,8 @@ STAGE PLANS: 1 key (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -17492,6 +17534,8 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -17761,6 +17805,8 @@ STAGE PLANS: 1 key (type: int) 2 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 1886769..9801470 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -60,6 +60,8 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -195,6 +197,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -356,6 +360,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -538,6 +544,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -698,6 +706,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -943,6 +953,8 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1078,6 +1090,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1239,6 +1253,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1421,6 +1437,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1581,6 +1599,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index 51994da..3232736 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -38,40 +38,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: Lineage: t1.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2 -POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2 -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] +POSTHOOK: Lineage: t2.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`) PREHOOK: type: QUERY @@ -96,17 +98,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:value_dec:decimal(24,0), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(24,0)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(24,0)) outputColumnNames: _col0 @@ -114,7 +116,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -133,9 +135,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(24,0) + dataColumns: dec:decimal(24,0), value_dec:decimal(24,0) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -149,17 +151,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(22,2)) outputColumnNames: _col0 @@ -167,7 +169,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -175,6 +177,8 @@ STAGE PLANS: 0 _col0 (type: decimal(26,2)) 1 _col0 (type: decimal(26,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -183,13 +187,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -205,9 +209,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(22,2) + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -335,6 +339,271 @@ POSTHOOK: Input: default@t2 9.00 9 9.00 9 9.00 9 +PREHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:value_dec:decimal(24,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(24,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(24,0)), value_dec (type: decimal(24,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(24,0), value_dec:decimal(24,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(22,2)), value_dec (type: decimal(22,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2), col 1:decimal(22,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(24,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +14.00 33.66 14 10 +14.00 33.66 14 22 +14.00 33.66 14 34 +14.00 33.66 14 39 +14.00 33.66 14 42 +14.00 33.66 14 45 +14.00 33.66 14 46 +14.00 33.66 14 49 +14.00 33.66 14 5 +17.00 14.26 17 1 +17.00 14.26 17 14 +17.00 14.26 17 16 +17.00 14.26 17 19 +17.00 14.26 17 2 +17.00 14.26 17 22 +17.00 14.26 17 29 +17.00 14.26 17 3 +17.00 14.26 17 4 +17.00 14.26 17 44 +45.00 23.55 45 1 +45.00 23.55 45 2 +45.00 23.55 45 22 +45.00 23.55 45 24 +45.00 23.55 45 42 +6.00 29.78 6 16 +6.00 29.78 6 28 +6.00 29.78 6 30 +6.00 29.78 6 34 +6.00 29.78 6 36 +6.00 29.78 6 44 +62.00 21.02 62 15 +62.00 21.02 62 15 +62.00 21.02 62 21 +62.00 21.02 62 21 +62.00 21.02 62 22 +62.00 21.02 62 25 +62.00 21.02 62 29 +62.00 21.02 62 3 +62.00 21.02 62 34 +62.00 21.02 62 47 +62.00 21.02 62 47 +62.00 21.02 62 49 +64.00 37.76 64 0 +64.00 37.76 64 10 +64.00 37.76 64 10 +64.00 37.76 64 13 +64.00 37.76 64 23 +64.00 37.76 64 25 +64.00 37.76 64 26 +64.00 37.76 64 27 +64.00 37.76 64 27 +64.00 37.76 64 30 +64.00 37.76 64 32 +64.00 37.76 64 34 +64.00 37.76 64 35 +64.00 37.76 64 38 +64.00 37.76 64 40 +64.00 37.76 64 43 +64.00 37.76 64 5 +64.00 37.76 64 50 +70.00 24.59 70 2 +70.00 24.59 70 25 +70.00 24.59 70 27 +70.00 24.59 70 28 +70.00 24.59 70 3 +70.00 24.59 70 32 +70.00 24.59 70 44 +79.00 15.12 79 1 +79.00 15.12 79 15 +79.00 15.12 79 25 +79.00 15.12 79 30 +79.00 15.12 79 35 +79.00 15.12 79 35 +89.00 15.09 89 1 +89.00 15.09 89 15 +89.00 15.09 89 23 +89.00 15.09 89 27 +89.00 15.09 89 28 +89.00 15.09 89 29 +89.00 15.09 89 30 +89.00 15.09 89 32 +89.00 15.09 89 39 +89.00 15.09 89 40 +89.00 15.09 89 45 +89.00 15.09 89 7 +9.00 48.96 9 12 +9.00 48.96 9 15 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 20 +9.00 48.96 9 20 +9.00 48.96 9 21 +9.00 48.96 9 21 +9.00 48.96 9 26 +9.00 48.96 9 27 +9.00 48.96 9 34 +9.00 48.96 9 38 +9.00 48.96 9 41 +9.00 48.96 9 42 +9.00 48.96 9 45 +9.00 48.96 9 48 +9.00 48.96 9 49 +9.00 48.96 9 5 +9.00 48.96 9 7 +9.00 48.96 9 7 PREHOOK: query: CREATE TABLE over1k_small(t tinyint, si smallint, i int, @@ -344,7 +613,7 @@ PREHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -360,7 +629,7 @@ POSTHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -375,40 +644,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k_small -PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1_small -POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1_small -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: Output: default@t1_small +POSTHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t1 -POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: Output: default@t1_small +POSTHOOK: Lineage: t1_small.dec SIMPLE [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t1_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2_small -POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2_small -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: Output: default@t2_small +POSTHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Output: default@t2_small +POSTHOOK: Lineage: t2_small.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t2_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) PREHOOK: type: QUERY @@ -433,46 +704,367 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2_small - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,0), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(14,0)/DECIMAL_64, 1:value_dec:decimal(14,0)/DECIMAL_64, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,0)) + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,0))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,0)/DECIMAL_64) -> 3:decimal(14,0)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,0)) + expressions: dec (type: decimal(14,0)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator native: true keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Execution mode: vectorized Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,0)/DECIMAL_64, value_dec:decimal(14,0)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2)/DECIMAL_64, 1:value_dec:decimal(14,2)/DECIMAL_64, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,2))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) + bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,2), decimal(16,2), decimal(16,2)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 89 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0)/DECIMAL_64, 1:value_dec:decimal(14,0)/DECIMAL_64, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,0))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,0)/DECIMAL_64) -> 3:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,0)/DECIMAL_64, value_dec:decimal(14,0)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2)/DECIMAL_64, 1:value_dec:decimal(14,2)/DECIMAL_64, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,2))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) + bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2), ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,2), decimal(16,2), decimal(16,2), decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 15.09 89 15 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,0) + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -486,32 +1078,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1_small - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,2), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)) + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,2)) + expressions: dec (type: decimal(14,2)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -520,13 +1114,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -534,17 +1128,18 @@ STAGE PLANS: Execution mode: vectorized Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,2) + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -566,3 +1161,166 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_small POSTHOOK: Input: default@t2_small #### A masked pattern was here #### +89.00 89 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 15.09 89 15 diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index bafcc70..c65ef71 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -121,6 +121,7 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:smallint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -263,6 +264,7 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:smallint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 533b729..3f9e90b 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -169,6 +169,8 @@ STAGE PLANS: 0 _col10 (type: binary) 1 _col10 (type: binary) Map Join Vectorization: + bigTableKeyExpressions: col 10:binary + bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:boolean, col 7:string, col 8:timestamp, col 9:decimal(4,2), col 10:binary className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -550,6 +552,8 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 10:binary className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 9238a8d..c98bb44 100644 --- ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -191,6 +191,8 @@ STAGE PLANS: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) Map Join Vectorization: + bigTableKeyExpressions: col 1:char(10) + bigTableValueExpressions: col 0:int, col 1:char(10) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -322,6 +324,8 @@ STAGE PLANS: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) Map Join Vectorization: + bigTableKeyExpressions: col 1:char(20) + bigTableValueExpressions: col 0:int, col 1:char(20) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -455,6 +459,8 @@ STAGE PLANS: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) Map Join Vectorization: + bigTableKeyExpressions: CastStringGroupToString(col 1:char(10)) -> 3:string + bigTableValueExpressions: col 0:int, col 1:char(10) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out index f8501f6..23ae87e 100644 --- ql/src/test/results/clientpositive/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -87,6 +87,8 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:string, col 7:string, col 8:timestamp, col 9:timestamp, col 10:boolean, col 11:boolean className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out index 39ee51c..6fb0c34 100644 --- ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out @@ -38,40 +38,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: Lineage: t1.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2 -POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2 -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] +POSTHOOK: Lineage: t2.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`) PREHOOK: type: QUERY @@ -98,14 +100,14 @@ STAGE PLANS: $hdt$_1:t2 TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(24,0)) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: decimal(26,2)) @@ -116,17 +118,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(22,2)) outputColumnNames: _col0 @@ -134,7 +136,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -142,19 +144,21 @@ STAGE PLANS: 0 _col0 (type: decimal(26,2)) 1 _col0 (type: decimal(26,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -170,9 +174,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(22,2) + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -300,6 +304,236 @@ POSTHOOK: Input: default@t2 9.00 9 9.00 9 9.00 9 +PREHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:t2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:t2 + TableScan + alias: t2 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(24,0)), value_dec (type: decimal(24,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(22,2)), value_dec (type: decimal(22,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2), col 1:decimal(22,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(24,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +14.00 33.66 14 10 +14.00 33.66 14 22 +14.00 33.66 14 34 +14.00 33.66 14 39 +14.00 33.66 14 42 +14.00 33.66 14 45 +14.00 33.66 14 46 +14.00 33.66 14 49 +14.00 33.66 14 5 +17.00 14.26 17 1 +17.00 14.26 17 14 +17.00 14.26 17 16 +17.00 14.26 17 19 +17.00 14.26 17 2 +17.00 14.26 17 22 +17.00 14.26 17 29 +17.00 14.26 17 3 +17.00 14.26 17 4 +17.00 14.26 17 44 +45.00 23.55 45 1 +45.00 23.55 45 2 +45.00 23.55 45 22 +45.00 23.55 45 24 +45.00 23.55 45 42 +6.00 29.78 6 16 +6.00 29.78 6 28 +6.00 29.78 6 30 +6.00 29.78 6 34 +6.00 29.78 6 36 +6.00 29.78 6 44 +62.00 21.02 62 15 +62.00 21.02 62 15 +62.00 21.02 62 21 +62.00 21.02 62 21 +62.00 21.02 62 22 +62.00 21.02 62 25 +62.00 21.02 62 29 +62.00 21.02 62 3 +62.00 21.02 62 34 +62.00 21.02 62 47 +62.00 21.02 62 47 +62.00 21.02 62 49 +64.00 37.76 64 0 +64.00 37.76 64 10 +64.00 37.76 64 10 +64.00 37.76 64 13 +64.00 37.76 64 23 +64.00 37.76 64 25 +64.00 37.76 64 26 +64.00 37.76 64 27 +64.00 37.76 64 27 +64.00 37.76 64 30 +64.00 37.76 64 32 +64.00 37.76 64 34 +64.00 37.76 64 35 +64.00 37.76 64 38 +64.00 37.76 64 40 +64.00 37.76 64 43 +64.00 37.76 64 5 +64.00 37.76 64 50 +70.00 24.59 70 2 +70.00 24.59 70 25 +70.00 24.59 70 27 +70.00 24.59 70 28 +70.00 24.59 70 3 +70.00 24.59 70 32 +70.00 24.59 70 44 +79.00 15.12 79 1 +79.00 15.12 79 15 +79.00 15.12 79 25 +79.00 15.12 79 30 +79.00 15.12 79 35 +79.00 15.12 79 35 +89.00 15.09 89 1 +89.00 15.09 89 15 +89.00 15.09 89 23 +89.00 15.09 89 27 +89.00 15.09 89 28 +89.00 15.09 89 29 +89.00 15.09 89 30 +89.00 15.09 89 32 +89.00 15.09 89 39 +89.00 15.09 89 40 +89.00 15.09 89 45 +89.00 15.09 89 7 +9.00 48.96 9 12 +9.00 48.96 9 15 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 20 +9.00 48.96 9 20 +9.00 48.96 9 21 +9.00 48.96 9 21 +9.00 48.96 9 26 +9.00 48.96 9 27 +9.00 48.96 9 34 +9.00 48.96 9 38 +9.00 48.96 9 41 +9.00 48.96 9 42 +9.00 48.96 9 45 +9.00 48.96 9 48 +9.00 48.96 9 49 +9.00 48.96 9 5 +9.00 48.96 9 7 +9.00 48.96 9 7 PREHOOK: query: CREATE TABLE over1k_small(t tinyint, si smallint, i int, @@ -309,7 +543,7 @@ PREHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -325,7 +559,7 @@ POSTHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -340,40 +574,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k_small -PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1_small -POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1_small -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: Output: default@t1_small +POSTHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t1 -POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: Output: default@t1_small +POSTHOOK: Lineage: t1_small.dec SIMPLE [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t1_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2_small -POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2_small -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: Output: default@t2_small +POSTHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Output: default@t2_small +POSTHOOK: Lineage: t2_small.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t2_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) PREHOOK: type: QUERY @@ -393,70 +629,322 @@ STAGE PLANS: Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:t1_small + $hdt$_1:t2_small Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:t1_small + $hdt$_1:t2_small + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)) + outputColumnNames: _col0 + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: t1_small - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2)/DECIMAL_64, 1:value_dec:decimal(14,2)/DECIMAL_64, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,2))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,2)) + expressions: dec (type: decimal(14,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) + bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,2), decimal(16,2), decimal(16,2)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 89 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:t2_small + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:t2_small + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2)/DECIMAL_64, 1:value_dec:decimal(14,2)/DECIMAL_64, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,2))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) + bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2), ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,2), decimal(16,2), decimal(16,2), decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 15.09 89 15 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:t2_small + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:t2_small + TableScan alias: t2_small - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)) + outputColumnNames: _col0 + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,0), 1:ROW__ID:struct] + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,0)) + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,0)) + expressions: dec (type: decimal(14,2)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false nativeNotSupportedKeyTypes: DECIMAL outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -464,17 +952,18 @@ STAGE PLANS: Execution mode: vectorized Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,0) + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -496,3 +985,130 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_small POSTHOOK: Input: default@t2_small #### A masked pattern was here #### +89.00 89 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:t2_small + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:t2_small + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 15.09 89 15 diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 553c6f4..bfe2d80 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -127,6 +127,7 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableValueExpressions: col 0:bigint, col 1:bigint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -185,6 +186,8 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string, col 1:string, col 2:bigint, col 3:bigint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 7efcfd9..848823f 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -214,6 +214,7 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableValueExpressions: col 4:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index 18b324c..1654bd9 100644 --- ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -244,6 +244,8 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: interval_day_time) 1 _col0 (type: string), _col1 (type: interval_day_time) Map Join Vectorization: + bigTableKeyExpressions: col 8:string, col 15:interval_day_time + bigTableValueExpressions: col 8:string, col 15:interval_day_time className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index d73e9ac..979477b 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -327,6 +327,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int className: VectorMapJoinOuterFilteredOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -448,6 +450,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int className: VectorMapJoinOuterFilteredOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -569,6 +573,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int className: VectorMapJoinOuterFilteredOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -690,6 +696,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int className: VectorMapJoinOuterFilteredOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_outer_join0.q.out ql/src/test/results/clientpositive/vector_outer_join0.q.out index 11a534d..603571d 100644 --- ql/src/test/results/clientpositive/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/vector_outer_join0.q.out @@ -118,6 +118,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:string, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -235,6 +237,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index 6dadcc6..9143fc8 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -282,6 +282,8 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:string, col 7:string, col 8:timestamp, col 9:timestamp, col 10:boolean, col 11:boolean className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -422,6 +424,8 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: + bigTableKeyExpressions: col 0:tinyint + bigTableValueExpressions: col 0:tinyint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -669,6 +673,8 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 0:tinyint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -682,6 +688,8 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: + bigTableKeyExpressions: col 0:tinyint + bigTableValueExpressions: col 0:tinyint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index bffc709..2fe0de3 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -313,6 +313,8 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 3:bigint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -326,6 +328,8 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index b8a4102..803f962 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -244,7 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"Column[cint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cstring1]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cint]","_col1":"Column[cstring1]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:Column[_col1]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"Column[cint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cstring1]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cint]","_col1":"Column[cstring1]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:Column[_col1]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -284,7 +284,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cstring2]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cstring1]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cstring1]","_col1":"Column[cstring2]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cstring2]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cstring1]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cstring1]","_col1":"Column[cstring2]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -324,7 +324,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cbigint]","_col1":"Column[cstring2]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cint]","_col1":"Column[cstring1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cint]","_col1":"Column[cbigint]","_col2":"Column[cstring1]","_col3":"Column[cstring2]"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col2":"0:Column[_col2]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cbigint]","_col1":"Column[cstring2]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"Column[cint]","_col1":"Column[cstring1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"Column[cint]","_col1":"Column[cbigint]","_col2":"Column[cstring1]","_col3":"Column[cstring2]"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col2":"0:Column[_col2]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index c68fa12..f93ccc8 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -258,7 +258,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"Column[ctinyint]","_col1":"Column[csmallint]","_col10":"Column[cboolean1]","_col11":"Column[cboolean2]","_col2":"Column[cint]","_col3":"Column[cbigint]","_col4":"Column[cfloat]","_col5":"Column[cdouble]","_col6":"Column[cstring1]","_col7":"Column[cstring2]","_col8":"Column[ctimestamp1]","_col9":"Column[ctimestamp2]"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"Column[ctinyint]","_col1":"Column[csmallint]","_col10":"Column[cboolean1]","_col11":"Column[cboolean2]","_col2":"Column[cint]","_col3":"Column[cbigint]","_col4":"Column[cfloat]","_col5":"Column[cdouble]","_col6":"Column[cstring1]","_col7":"Column[cstring2]","_col8":"Column[ctimestamp1]","_col9":"Column[ctimestamp2]"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col10":"0:Column[_col10]","_col11":"0:Column[_col11]","_col12":"1:Column[_col0]","_col13":"1:Column[_col1]","_col14":"1:Column[_col2]","_col15":"1:Column[_col3]","_col16":"1:Column[_col4]","_col17":"1:Column[_col5]","_col18":"1:Column[_col6]","_col19":"1:Column[_col7]","_col2":"0:Column[_col2]","_col20":"1:Column[_col8]","_col21":"1:Column[_col9]","_col22":"1:Column[_col10]","_col23":"1:Column[_col11]","_col3":"0:Column[_col3]","_col4":"0:Column[_col4]","_col5":"0:Column[_col5]","_col6":"0:Column[_col6]","_col7":"0:Column[_col7]","_col8":"0:Column[_col8]","_col9":"0:Column[_col9]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"Column[ctinyint]","_col1":"Column[csmallint]","_col10":"Column[cboolean1]","_col11":"Column[cboolean2]","_col2":"Column[cint]","_col3":"Column[cbigint]","_col4":"Column[cfloat]","_col5":"Column[cdouble]","_col6":"Column[cstring1]","_col7":"Column[cstring2]","_col8":"Column[ctimestamp1]","_col9":"Column[ctimestamp2]"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"Column[ctinyint]","_col1":"Column[csmallint]","_col10":"Column[cboolean1]","_col11":"Column[cboolean2]","_col2":"Column[cint]","_col3":"Column[cbigint]","_col4":"Column[cfloat]","_col5":"Column[cdouble]","_col6":"Column[cstring1]","_col7":"Column[cstring2]","_col8":"Column[ctimestamp1]","_col9":"Column[ctimestamp2]"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col10":"0:Column[_col10]","_col11":"0:Column[_col11]","_col12":"1:Column[_col0]","_col13":"1:Column[_col1]","_col14":"1:Column[_col2]","_col15":"1:Column[_col3]","_col16":"1:Column[_col4]","_col17":"1:Column[_col5]","_col18":"1:Column[_col6]","_col19":"1:Column[_col7]","_col2":"0:Column[_col2]","_col20":"1:Column[_col8]","_col21":"1:Column[_col9]","_col22":"1:Column[_col10]","_col23":"1:Column[_col11]","_col3":"0:Column[_col3]","_col4":"0:Column[_col4]","_col5":"0:Column[_col5]","_col6":"0:Column[_col6]","_col7":"0:Column[_col7]","_col8":"0:Column[_col8]","_col9":"0:Column[_col9]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint","col 1:smallint","col 2:int","col 3:bigint","col 4:float","col 5:double","col 6:string","col 7:string","col 8:timestamp","col 9:timestamp","col 10:boolean","col 11:boolean"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} PREHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -339,7 +339,7 @@ from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"Column[ctinyint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"Column[ctinyint]"},"outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"Column[ctinyint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"Column[ctinyint]"},"outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} PREHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -782,7 +782,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"Column[cint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"Column[ctinyint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"Column[ctinyint]","_col1":"Column[cint]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"Column[cint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"Column[ctinyint]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"Column[ctinyint]","_col1":"Column[cint]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"Column[_col0]"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_outer_join6.q.out ql/src/test/results/clientpositive/vector_outer_join6.q.out index 474a9dc..91f04c6 100644 --- ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -130,7 +130,7 @@ POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_0:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_0:tjoin2":{"TableScan":{"alias:":"tjoin2","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col2":"1:Column[_col0]"},"condition map:":[{"":"Right Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col2 (type: int), _col0 (type: int), _col1 (type: int)","columnExprMap:":{"_col0":"Column[_col2]","_col1":"Column[_col0]","_col2":"Column[_col1]"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 0, 1]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col3":"1:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","columnExprMap:":{"_col0":"Column[_col0]","_col1":"Column[_col1]","_col2":"Column[_col3]"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_27","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_28"}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_29"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_0:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_0:tjoin2":{"TableScan":{"alias:":"tjoin2","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col2":"1:Column[_col0]"},"condition map:":[{"":"Right Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col2 (type: int), _col0 (type: int), _col1 (type: int)","columnExprMap:":{"_col0":"Column[_col2]","_col1":"Column[_col0]","_col2":"Column[_col1]"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 0, 1]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col3":"1:Column[_col0]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 2:int","col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","columnExprMap:":{"_col0":"Column[_col0]","_col1":"Column[_col1]","_col2":"Column[_col3]"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_27","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_28"}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_29"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -157,7 +157,7 @@ POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_0:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_0:tjoin2":{"TableScan":{"alias:":"tjoin2","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"Column[c1]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col2":"1:Column[_col0]"},"condition map:":[{"":"Right Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col2 (type: int), _col0 (type: int), _col1 (type: int)","columnExprMap:":{"_col0":"Column[_col2]","_col1":"Column[_col0]","_col2":"Column[_col1]"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 0, 1]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_27"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_28"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_0:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_0:tjoin2":{"TableScan":{"alias:":"tjoin2","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"Column[c1]"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"Column[rnum]","_col1":"Column[c1]"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]","_col2":"1:Column[_col0]"},"condition map:":[{"":"Right Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col2 (type: int), _col0 (type: int), _col1 (type: int)","columnExprMap:":{"_col0":"Column[_col2]","_col1":"Column[_col0]","_col2":"Column[_col1]"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 0, 1]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:Column[_col0]","_col1":"0:Column[_col1]"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 2:int","col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_27"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_28"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out index cc70cef..fe68e5c 100644 --- ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out @@ -465,7 +465,7 @@ STAGE PLANS: aggregators: VectorUDAFSumDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> decimal(25,2) className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:decimal(15,2)/DECIMAL_64, col 1:decimal(15,2)/DECIMAL_64 + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -499,7 +499,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: c1:decimal(15,2)/DECIMAL_64, c2:decimal(15,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [decimal(15,2), decimal(15,2)] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -1532,7 +1532,7 @@ STAGE PLANS: aggregators: VectorUDAFSumDecimal64(col 0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:decimal(7,2)/DECIMAL_64, col 1:decimal(7,2)/DECIMAL_64 + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1566,7 +1566,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: c1:decimal(7,2)/DECIMAL_64, c2:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [decimal(7,2), decimal(7,2)] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index 02ada3e..a6fee45 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -69,6 +69,8 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 2:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 6d81046..3f79d2b 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -97,6 +97,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true