diff --git contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q new file mode 100644 index 0000000..273663a --- /dev/null +++ contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q @@ -0,0 +1,21 @@ +--! qt:dataset:src_thrift + +set hive.vectorized.execution.enabled=true; +set hive.test.vectorized.execution.enabled.override=enable; + +add jar ${system:maven.local.repository}/org/apache/hive/hive-contrib/${system:hive.version}/hive-contrib-${system:hive.version}.jar; + +CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum'; +CREATE TEMPORARY FUNCTION example_mapconcat AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleMapConcat'; +CREATE TEMPORARY FUNCTION example_structprint AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleStructPrint'; + +EXPLAIN +SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) +FROM src_thrift; + +SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) +FROM src_thrift; + +DROP TEMPORARY FUNCTION example_arraysum; +DROP TEMPORARY FUNCTION example_mapconcat; +DROP TEMPORARY FUNCTION example_structprint; diff --git contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out new file mode 100644 index 0000000..75bbdff --- /dev/null +++ contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: example_arraysum +POSTHOOK: query: CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: example_arraysum +PREHOOK: query: CREATE TEMPORARY FUNCTION example_mapconcat AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleMapConcat' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: example_mapconcat +POSTHOOK: query: CREATE TEMPORARY FUNCTION example_mapconcat AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleMapConcat' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: example_mapconcat +PREHOOK: query: CREATE TEMPORARY FUNCTION example_structprint AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleStructPrint' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: example_structprint +POSTHOOK: query: CREATE TEMPORARY FUNCTION example_structprint AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleStructPrint' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: example_structprint +PREHOOK: query: EXPLAIN +SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) +FROM src_thrift +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) +FROM src_thrift +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_thrift + Statistics: Num rows: 11 Data size: 30700 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: example_arraysum(lint) (type: double), example_mapconcat(mstringstring) (type: string), example_structprint(lintstring[0]) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 30700 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 30700 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) +FROM src_thrift +PREHOOK: type: QUERY +PREHOOK: Input: default@src_thrift +#### A masked pattern was here #### +POSTHOOK: query: SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) +FROM src_thrift +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_thrift +#### A masked pattern was here #### +0.0 (key_0:value_0) (0:0)(1:0)(2:0) +6.0 (key_1:value_1) (0:1)(1:1)(2:1) +12.0 (key_2:value_2) (0:4)(1:8)(2:2) +18.0 (key_3:value_3) (0:9)(1:27)(2:3) +24.0 (key_4:value_4) (0:16)(1:64)(2:4) +30.0 (key_5:value_5) (0:25)(1:125)(2:5) +36.0 (key_6:value_6) (0:36)(1:216)(2:6) +42.0 (key_7:value_7) (0:49)(1:343)(2:7) +48.0 (key_8:value_8) (0:64)(1:512)(2:8) +54.0 (key_9:value_9) (0:81)(1:729)(2:9) +NULL NULL NULL +PREHOOK: query: DROP TEMPORARY FUNCTION example_arraysum +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: example_arraysum +POSTHOOK: query: DROP TEMPORARY FUNCTION example_arraysum +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: example_arraysum +PREHOOK: query: DROP TEMPORARY FUNCTION example_mapconcat +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: example_mapconcat +POSTHOOK: query: DROP TEMPORARY FUNCTION example_mapconcat +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: example_mapconcat +PREHOOK: query: DROP TEMPORARY FUNCTION example_structprint +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: example_structprint +POSTHOOK: query: DROP TEMPORARY FUNCTION example_structprint +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: example_structprint diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index 55dc461..20d518b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; @@ -1590,20 +1591,7 @@ public Object initValue(Object ignored) { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - final StructColumnVector structColVector = (StructColumnVector) column; - final SettableStructObjectInspector structOI = - (SettableStructObjectInspector) this.objectInspector; - final List fields = structOI.getAllStructFieldRefs(); - final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - - final int fieldSize = fields.size(); - for (int i = 0; i < fieldSize; i++) { - final StructField structField = fields.get(i); - final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i], - fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row); - structOI.setStructFieldData(obj, structField, value); - } - return this.obj; + return setValueInternal(obj, column, row); } @Override @@ -1611,19 +1599,39 @@ public Object setValue(Object struct, ColumnVector column, int row) throws HiveE if (struct == null) { struct = initValue(null); } + + return setValueInternal(struct, column, row); + } - final StructColumnVector structColVector = (StructColumnVector) column; - final SettableStructObjectInspector structOI = - (SettableStructObjectInspector) this.objectInspector; - final List fields = structOI.getAllStructFieldRefs(); - final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + private Object setValueInternal(final Object struct, ColumnVector colVector, int batchIndex) { + if (colVector == null) { + // The planner will not include unneeded columns for reading but other parts of execution + // may ask for them.. + return null; + } + + final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (!colVector.noNulls && colVector.isNull[adjustedIndex]) { + return null; + } - final int fieldSize = fields.size(); - for (int i = 0; i < fieldSize; i++) { - final StructField structField = fields.get(i); - final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i], - fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row); - structOI.setStructFieldData(struct, structField, value); + final StructColumnVector structColumnVector = (StructColumnVector) colVector; + final StandardStructObjectInspector structInspector = + (StandardStructObjectInspector) objectInspector; + final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + final int size = fieldTypeInfos.size(); + final List structFields = + structInspector.getAllStructFieldRefs(); + + for (int i = 0; i < size; i++) { + final StructField structField = structFields.get(i); + final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); + final Object value = vectorExtractRow.extractRowColumn( + structColumnVector.fields[i], + fieldTypeInfo, + structField.getFieldObjectInspector(), + adjustedIndex); + structInspector.setStructFieldData(struct, structField, value); } return struct; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java index fe3c91c..4477579 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java @@ -21,16 +21,21 @@ import java.sql.Timestamp; import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.Random; import junit.framework.Assert; +import junit.framework.TestCase; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -42,8 +47,11 @@ import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -379,6 +387,155 @@ private void testSetterText(TypeInfo type) throws HiveException { } } + @SuppressWarnings("unchecked") + private void testListLong() throws HiveException { + LongColumnVector icv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, + 5, new Random(10)); + ListColumnVector cv = new ListColumnVector(3, icv); + cv.init(); + + // set the offset and length for the two elements + cv.offsets[0] = 0; + cv.lengths[0] = 2; + cv.offsets[1] = 2; + cv.lengths[1] = 3; + + // initialize the integer values + for (int i = 0; i < 5; i++) { + icv.vector[i] = i; + icv.isNull[i] = false; + } + icv.noNulls = true; + + cv.isNull[0] = false; + cv.isNull[1] = false; + cv.isNull[2] = true; + cv.noNulls = false; + + SettableListObjectInspector listOI = + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + + VectorExpressionWriter vew = VectorExpressionWriterFactory.genVectorExpressionWritable(listOI); + + List values1 = (List) listOI.create(2); + vew.setValue(values1, cv, 0); + + List values2 = (List) listOI.create(3); + vew.setValue(values2, cv, 1); + + TestCase.assertEquals(values1.size(), 2); + TestCase.assertEquals(values2.size(), 3); + + for (int i = 0; i < values1.size(); i++) { + IntWritable w = (IntWritable) values1.get(i); + TestCase.assertEquals(i, w.get()); + } + + for (int i = 0; i < values2.size(); i++) { + IntWritable w = (IntWritable) values2.get(i); + TestCase.assertEquals(2 + i, w.get()); + } + + List values3 = (List) vew.writeValue(cv, 0); + TestCase.assertEquals(2, values3.size()); + for (int i = 0; i < values1.size(); i++) { + IntWritable w = (IntWritable) values3.get(i); + TestCase.assertEquals(i, w.get()); + } + + List values4 = (List) vew.writeValue(cv, 1); + TestCase.assertEquals(3, values4.size()); + for (int i = 0; i < values2.size(); i++) { + IntWritable w = (IntWritable) values4.get(i); + TestCase.assertEquals(2 + i, w.get()); + } + + List values5 = (List) vew.writeValue(cv, 2); + TestCase.assertNull(values5); + } + + @SuppressWarnings("unchecked") + private void testMapLong() throws HiveException { + LongColumnVector kcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, + 5, new Random(10)); + LongColumnVector vcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, + 5, new Random(10)); + MapColumnVector cv = new MapColumnVector(3, kcv, vcv); + cv.init(); + + // set the offset and length for the two elements + cv.offsets[0] = 0; + cv.lengths[0] = 2; + cv.offsets[1] = 2; + cv.lengths[1] = 3; + + // initialize the keys and values + for (int i = 0; i < 5; i++) { + kcv.vector[i] = i; + kcv.isNull[i] = false; + } + kcv.noNulls = true; + + for (int i = 0; i < 5; i++) { + vcv.vector[i] = 5 + i; + vcv.isNull[i] = false; + } + vcv.noNulls = true; + + cv.isNull[0] = false; + cv.isNull[1] = false; + cv.isNull[2] = true; + cv.noNulls = false; + + SettableMapObjectInspector mapOI = + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector, + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + + VectorExpressionWriter vew = VectorExpressionWriterFactory.genVectorExpressionWritable(mapOI); + + Map values1 = (Map) mapOI.create(); + vew.setValue(values1, cv, 0); + + Map values2 = (Map) mapOI.create(); + vew.setValue(values2, cv, 1); + + TestCase.assertEquals(2, values1.size()); + TestCase.assertEquals(3, values2.size()); + + for (int i = 0; i < values1.size(); i++) { + IntWritable key = new IntWritable(i); + IntWritable w = (IntWritable) values1.get(key); + TestCase.assertEquals(5 + i, w.get()); + } + + for (int i = 0; i < values2.size(); i++) { + IntWritable key = new IntWritable(2 + i); + IntWritable w = (IntWritable) values2.get(key); + TestCase.assertEquals(5 + 2 + i, w.get()); + } + + Map values3 = (Map) vew.writeValue(cv, 0); + TestCase.assertEquals(2, values3.size()); + for (int i = 0; i < values1.size(); i++) { + IntWritable key = new IntWritable(i); + IntWritable w = (IntWritable) values1.get(key); + TestCase.assertEquals(5 + i, w.get()); + } + + Map values4 = (Map) vew.writeValue(cv, 1); + TestCase.assertEquals(3, values4.size()); + for (int i = 0; i < values2.size(); i++) { + IntWritable key = new IntWritable(2 + i); + IntWritable w = (IntWritable) values2.get(key); + TestCase.assertEquals(5 + 2 + i, w.get()); + } + + Map values5 = (Map) vew.writeValue(cv, 2); + TestCase.assertNull(values5); + } + @Test public void testVectorExpressionWriterDouble() throws HiveException { testWriterDouble(TypeInfoFactory.doubleTypeInfo); @@ -506,4 +663,14 @@ public void testVectorExpressionWriterBinary() throws HiveException { public void testVectorExpressionSetterBinary() throws HiveException { testSetterText(TypeInfoFactory.binaryTypeInfo); } + + @Test + public void testVectorExpressionListLong() throws HiveException { + testListLong(); + } + + @Test + public void testVectorExpressionMapLong() throws HiveException { + testMapLong(); + } }