diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index c76026b..4918825 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; import org.apache.hadoop.hive.ql.stats.StatsPublisher; @@ -115,7 +116,7 @@ public String getSchemaEvolutionColumnsTypes() { @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { - if (row instanceof VectorizedRowBatch) { + if (vectorized) { // We need to check with 'instanceof' instead of just checking // vectorized because the row can be a VectorizedRowBatch when // FetchOptimizer kicks in even if the operator pipeline is not @@ -134,7 +135,7 @@ public void process(Object row, int tag) throws HiveException { return; } } - if (conf != null && conf.isGatherStats()) { + if (!vectorized && conf != null && conf.isGatherStats()) { gatherStats(row); } forward(row, inputObjInspectors[tag], vectorized); @@ -254,6 +255,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { } rowLimit = conf.getRowLimit(); + if (!conf.isGatherStats()) { + return; + } if (hconf instanceof JobConf) { jc = (JobConf) hconf; diff --git ql/src/test/queries/clientpositive/vector_gather_stats.q ql/src/test/queries/clientpositive/vector_gather_stats.q new file mode 100644 index 0000000..34e1b1d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_gather_stats.q @@ -0,0 +1,22 @@ +set hive.vectorized.execution.enabled=true; + +-- HIVE-18191 + +create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +); +alter table cd add partition (cd_education_status='Primary'); +insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0); +analyze table cd partition (cd_education_status) compute statistics; \ No newline at end of file diff --git ql/src/test/results/clientpositive/vector_gather_stats.q.out ql/src/test/results/clientpositive/vector_gather_stats.q.out new file mode 100644 index 0000000..8edb5f4 --- /dev/null +++ ql/src/test/results/clientpositive/vector_gather_stats.q.out @@ -0,0 +1,69 @@ +PREHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cd +POSTHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cd +PREHOOK: query: alter table cd add partition (cd_education_status='Primary') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@cd +POSTHOOK: query: alter table cd add partition (cd_education_status='Primary') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary +PREHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_credit_rating SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_demo_sk EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_college_count EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_count EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_employed_count EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_gender SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_marital_status SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_purchase_estimate EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@cd +PREHOOK: Input: default@cd@cd_education_status=Primary +PREHOOK: Output: default@cd +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cd +POSTHOOK: Input: default@cd@cd_education_status=Primary +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary