diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index c76026b..c0138f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -115,11 +115,7 @@ public String getSchemaEvolutionColumnsTypes() { @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { - if (row instanceof VectorizedRowBatch) { - // We need to check with 'instanceof' instead of just checking - // vectorized because the row can be a VectorizedRowBatch when - // FetchOptimizer kicks in even if the operator pipeline is not - // vectorized + if (vectorized) { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (currCount >= rowLimit) { setDone(true); @@ -266,6 +262,10 @@ protected void initializeOp(Configuration hconf) throws HiveException { currentStat = null; stats = new HashMap(); + /* + * This TableScanDesc flag is strictly set by the Vectorizer class for vectorized MapWork + * vertices. + */ vectorized = conf.isVectorized(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 6500682..8ce2c33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1513,7 +1513,7 @@ private boolean validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInf LOG.info("Examining input format to see if vectorization is enabled."); ImmutablePair onlyOneTableScanPair = verifyOnlyOneTableScanOperator(mapWork); - if (onlyOneTableScanPair == null) { + if (onlyOneTableScanPair == null) { VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); Preconditions.checkState(notVectorizedReason != null); mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); @@ -1638,6 +1638,11 @@ private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOpera // Set "global" member indicating where to store "not vectorized" information if necessary. currentBaseWork = mapWork; + if (!validateTableScanOperator(tableScanOperator, mapWork)) { + + // The "not vectorized" information has been stored in the MapWork vertex. + return false; + } try { validateAndVectorizeMapOperators(tableScanOperator, isTezOrSpark, vectorTaskColumnInfo); } catch (VectorizerCannotVectorizeException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 4b7d2b4..8d966c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -495,6 +495,10 @@ public TableScanOperatorExplainVectorization getTableScanVectorization() { return new TableScanOperatorExplainVectorization(this, vectorTableScanDesc); } + /* + * This TableScanDesc flag is strictly set by the Vectorizer class for vectorized MapWork + * vertices. + */ public void setVectorized(boolean vectorized) { this.vectorized = vectorized; } diff --git ql/src/test/queries/clientpositive/vector_gather_stats.q ql/src/test/queries/clientpositive/vector_gather_stats.q new file mode 100644 index 0000000..deaca0e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_gather_stats.q @@ -0,0 +1,26 @@ +set hive.vectorized.execution.enabled=true; + +-- HIVE-18191 + +create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +); +alter table cd add partition (cd_education_status='Primary'); +insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0); + +explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics; + +analyze table cd partition (cd_education_status) compute statistics; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/explainuser_3.q.out ql/src/test/results/clientpositive/tez/explainuser_3.q.out index fa73dc6..37494a1 100644 --- ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -191,7 +191,7 @@ POSTHOOK: type: QUERY Stage-2 Stats Work{} Stage-0 - Map 1 vectorized + Map 1 TableScan [TS_0] (rows=500 width=10) default@src,src,Tbl:COMPLETE,Col:COMPLETE diff --git ql/src/test/results/clientpositive/vector_gather_stats.q.out ql/src/test/results/clientpositive/vector_gather_stats.q.out new file mode 100644 index 0000000..cf9bc85 --- /dev/null +++ ql/src/test/results/clientpositive/vector_gather_stats.q.out @@ -0,0 +1,101 @@ +PREHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cd +POSTHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cd +PREHOOK: query: alter table cd add partition (cd_education_status='Primary') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@cd +POSTHOOK: query: alter table cd add partition (cd_education_status='Primary') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary +PREHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_credit_rating SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_demo_sk EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_college_count EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_count EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_employed_count EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_gender SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_marital_status SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_purchase_estimate EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: cd + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: TABLESCAN operator: gather stats not supported + vectorized: false + + Stage: Stage-1 + Stats Work + Basic Stats Work: + +PREHOOK: query: analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@cd +PREHOOK: Input: default@cd@cd_education_status=Primary +PREHOOK: Output: default@cd +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cd +POSTHOOK: Input: default@cd@cd_education_status=Primary +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary