diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d0eb2a4..291c90b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3512,7 +3512,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "The default value is false."), HIVE_VECTORIZATION_ROW_DESERIALIZE_INPUTFORMAT_EXCLUDES( "hive.vectorized.row.serde.inputformat.excludes", - "org.apache.parquet.hadoop.ParquetInputFormat,org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", + "org.apache.parquet.hadoop.ParquetInputFormat,org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat,org.apache.hive.storage.jdbc.JdbcInputFormat", "The input formats not supported by row deserialize vectorization."), HIVE_VECTOR_ADAPTOR_USAGE_MODE("hive.vectorized.adaptor.usage.mode", "all", new StringSet("none", "chosen", "all"), "Specifies the extent to which the VectorUDFAdaptor will be used for UDFs that do not have a corresponding vectorized class.\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 6f1346d..2542e03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -945,8 +945,15 @@ public void process(Writable value) throws HiveException { // Convert input row to standard objects. List standardObjects = new ArrayList(); - ObjectInspectorUtils.copyToStandardObject(standardObjects, deserialized, - currentPartRawRowObjectInspector, ObjectInspectorCopyOption.WRITABLE); + try { + ObjectInspectorUtils.copyToStandardObject( + standardObjects, + deserialized, + currentPartRawRowObjectInspector, + ObjectInspectorCopyOption.WRITABLE); + } catch (Exception e) { + throw new HiveException("copyToStandardObject failed: " + e); + } if (standardObjects.size() < currentDataColumnCount) { throw new HiveException("Input File Format returned row with too few columns"); } diff --git ql/src/test/queries/clientpositive/sysdb.q ql/src/test/queries/clientpositive/sysdb.q index 1dfcbce..459b11f 100644 --- ql/src/test/queries/clientpositive/sysdb.q +++ ql/src/test/queries/clientpositive/sysdb.q @@ -60,6 +60,9 @@ select role_name from roles order by role_name limit 5; select principal_name, grantor from role_map order by principal_name, grantor limit 5; +explain vectorization detail +select count(*) from sds; + select count(*) from sds; select param_key, param_value from sd_params order by param_key, param_value limit 5; diff --git ql/src/test/results/clientpositive/llap/sysdb.q.out ql/src/test/results/clientpositive/llap/sysdb.q.out index de20f2d..9eed19d 100644 --- ql/src/test/results/clientpositive/llap/sysdb.q.out +++ ql/src/test/results/clientpositive/llap/sysdb.q.out @@ -3256,6 +3256,95 @@ POSTHOOK: query: select principal_name, grantor from role_map order by principal POSTHOOK: type: QUERY POSTHOOK: Input: sys@role_map #### A masked pattern was here #### +PREHOOK: query: explain vectorization detail +select count(*) from sds +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(*) from sds +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: sds + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS true AND hive.vectorized.row.serde.inputformat.excludes NOT CONTAINS org.apache.hive.storage.jdbc.JdbcInputFormat IS false, hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hive.storage.jdbc.JdbcInputFormat + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from sds PREHOOK: type: QUERY PREHOOK: Input: sys@sds @@ -3556,7 +3645,7 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0)