commit ac6923221489d97a66ad1ac325f24268afad5c48 Author: Ivan Suller Date: Tue Jul 2 16:05:52 2019 +0200 HIVE-21944 Change-Id: Ia9e1e39e057ff71be00c287311c46870a9e040d4 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 52e8dcb090..7d7b90c028 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -102,9 +102,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -310,9 +307,9 @@ }; } - boolean isVectorizationEnabled; + private boolean isVectorizationEnabled; private EnabledOverride vectorizationEnabledOverride; - boolean isTestForcedVectorizationEnable; + private boolean isTestForcedVectorizationEnable; private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; @@ -856,10 +853,7 @@ private void doProcessChildren( VectorTaskColumnInfo vectorTaskColumnInfo) throws VectorizerCannotVectorizeException { - List> vectorChildren = newOperatorList(); List> children = parent.getChildOperators(); - List>> listOfChildMultipleParents = - new ArrayList>>(); final int childrenCount = children.size(); for (int i = 0; i < childrenCount; i++) { @@ -975,7 +969,6 @@ private void fixupOtherParent( vContext = ((VectorizationOperator) vectorParent).getInputVectorizationContext(); } - OperatorDesc desc = child.getConf(); Operator vectorChild; try { @@ -1191,13 +1184,6 @@ private void convertMapWork(MapWork mapWork, boolean isTezOrSpark) throws Semant validateAndVectorizeMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark); } - private void addMapWorkRules(Map opRules, NodeProcessor np) { - opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" - + FileSinkOperator.getOperatorName()), np); - opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*" - + ReduceSinkOperator.getOperatorName()), np); - } - /* * Determine if there is only one TableScanOperator. Currently in Map vectorization, we do not * try to vectorize multiple input trees. @@ -1230,7 +1216,7 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) setNodeIssue("Vectorized map work only works with 1 TableScanOperator"); return null; } - return new ImmutablePair(alias, tableScanOperator); + return new ImmutablePair<>(alias, tableScanOperator); } private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, @@ -1677,7 +1663,7 @@ private void setValidateInputFormatAndSchemaEvolutionExplain(MapWork mapWork, ArrayList vectorPartitionDescList = new ArrayList(); vectorPartitionDescList.addAll(vectorPartitionDescMap.keySet()); mapWork.setVectorPartitionDescList(vectorPartitionDescList); - mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList<>(enabledConditionsMetSet)); mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); } @@ -1933,7 +1919,7 @@ private void setValidateInputFormatAndSchemaEvolutionExplain(MapWork mapWork, ArrayList vectorPartitionDescList = new ArrayList(); vectorPartitionDescList.addAll(vectorPartitionDescMap.keySet()); mapWork.setVectorPartitionDescList(vectorPartitionDescList); - mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet)); + mapWork.setVectorizationEnabledConditionsMet(new ArrayList<>(enabledConditionsMetSet)); mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); return new ImmutablePair(true, false); @@ -2049,8 +2035,6 @@ private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo v vectorTaskColumnInfo.transferToBaseWork(mapWork); mapWork.setVectorMode(true); - - return; } private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOperator tableScanOperator, @@ -2225,8 +2209,6 @@ private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { private void validateAndVectorizeReduceWork(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - Operator reducer = reduceWork.getReducer(); - // Validate input to ReduceWork. if (!getOnlyStructObjectInspectors(reduceWork, vectorTaskColumnInfo)) { return; @@ -2611,18 +2593,6 @@ private boolean validateMapJoinDesc(MapJoinDesc desc) { return true; } - private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op) { - SparkHashTableSinkDesc desc = op.getConf(); - byte tag = desc.getTag(); - // it's essentially a MapJoinDesc - List filterExprs = desc.getFilters().get(tag); - List keyExprs = desc.getKeys().get(tag); - List valueExprs = desc.getExprs().get(tag); - return validateExprNodeDesc( - filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true) && - validateExprNodeDesc(keyExprs, "Key") && validateExprNodeDesc(valueExprs, "Value"); - } - private boolean validateReduceSinkOperator(ReduceSinkOperator op) { List keyDescs = op.getConf().getKeyCols(); List partitionDescs = op.getConf().getPartitionCols(); @@ -2900,7 +2870,6 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex } } - boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy(); String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames(); final int count = evaluatorFunctionNames.length; WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs(); @@ -3277,20 +3246,6 @@ public static String getValidateDataTypeErrorMsg(String type, VectorExpressionDe return (result ? null : "Vectorizing data type " + type + " not supported"); } - private VectorizationContext getVectorizationContext(String contextName, - VectorTaskColumnInfo vectorTaskColumnInfo) { - - VectorizationContext vContext = - new VectorizationContext( - contextName, - vectorTaskColumnInfo.allColumnNames, - vectorTaskColumnInfo.allTypeInfos, - vectorTaskColumnInfo.allDataTypePhysicalVariations, - hiveConf); - - return vContext; - } - private void fixupParentChildOperators(Operator op, Operator vectorOp) { if (op.getParentOperators() != null) { @@ -3781,16 +3736,13 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi int firstSmallTableOutputColumn; firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0); - int smallTableOutputCount = 0; nextOutputColumn = firstSmallTableOutputColumn; // Small table indices has more information (i.e. keys) than retain, so use it if it exists... if (smallTableIndicesSize > 0) { - smallTableOutputCount = smallTableIndicesSize; for (int i = 0; i < smallTableIndicesSize; i++) { if (smallTableIndices[i] >= 0) { - // Zero and above numbers indicate a big table key is needed for // small table result "area". @@ -3854,8 +3806,6 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi nextOutputColumn++; } } else if (smallTableRetainSize > 0) { - smallTableOutputCount = smallTableRetainSize; - // Only small table values appear in join output result. for (int i = 0; i < smallTableRetainSize; i++) { @@ -3904,11 +3854,11 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi vectorDesc.setSupportsKeyTypes(supportsKeyTypes); if (!supportsKeyTypes) { - vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes)); + vectorDesc.setNotSupportedKeyTypes(new ArrayList<>(notSupportedKeyTypes)); } vectorDesc.setSupportsValueTypes(supportsValueTypes); if (!supportsValueTypes) { - vectorDesc.setNotSupportedValueTypes(new ArrayList(notSupportedValueTypes)); + vectorDesc.setNotSupportedValueTypes(new ArrayList<>(notSupportedValueTypes)); } // Check common conditions for both Optimized and Fast Hash Tables. @@ -4988,9 +4938,7 @@ private static VectorPTFInfo createVectorPTFInfo(Operator outputSignature = ptfOp.getSchema().getSignature(); + List outputSignature = ptfOp.getSchema().getSignature(); final int outputSize = outputSignature.size(); boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy(); @@ -4999,7 +4947,6 @@ private static VectorPTFInfo createVectorPTFInfo(Operator[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists(); /* @@ -5073,10 +5020,6 @@ private static VectorPTFInfo createVectorPTFInfo(Operator exprNodeDescList = evaluatorInputExprNodeDescLists[i]; VectorExpression inputVectorExpression; final Type columnVectorType; @@ -5089,7 +5032,6 @@ private static VectorPTFInfo createVectorPTFInfo(Operator