diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index f15de97..d7d28f4 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -226,6 +226,7 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_char_simple.q,\ vector_coalesce.q,\ vector_coalesce_2.q,\ + vector_complex.q,\ vector_count_distinct.q,\ vector_data_types.q,\ vector_date_1.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java new file mode 100644 index 0000000..66240dd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of list objects. + * + * Each list is composed of a range of elements in the underlying child + * ColumnVector. The range for list i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public class ListColumnVector extends MultiValuedColumnVector { + + public ColumnVector child; + + public ListColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE, null); + } + + /** + * Constructor for ListColumnVector. + * + * @param len Vector length + * @param child The child vector + */ + public ListColumnVector(int len, ColumnVector child) { + super(len); + this.child = child; + } + + @Override + protected void childFlatten(boolean useSelected, int[] selected, int size) { + child.flatten(useSelected, selected, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + ListColumnVector input = (ListColumnVector) inputVector; + if (input.isRepeating) { + inputElementNum = 0; + } + if (!input.noNulls && input.isNull[inputElementNum]) { + isNull[outElementNum] = true; + noNulls = false; + } else { + isNull[outElementNum] = false; + int offset = childCount; + int length = (int) input.lengths[inputElementNum]; + int inputOffset = (int) input.offsets[inputElementNum]; + offsets[outElementNum] = offset; + childCount += length; + lengths[outElementNum] = length; + child.ensureSize(childCount, true); + for (int i = 0; i < length; ++i) { + child.setElement(i + offset, inputOffset + i, input.child); + } + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + boolean isFirst = true; + for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) { + if (isFirst) { + isFirst = false; + } else { + buffer.append(", "); + } + child.stringifyValue(buffer, (int) i); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void init() { + super.init(); + child.init(); + } + + @Override + public void reset() { + super.reset(); + child.reset(); + } + + @Override + public void unFlatten() { + super.unFlatten(); + if (!isRepeating || noNulls || !isNull[0]) { + child.unFlatten(); + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java new file mode 100644 index 0000000..e8421e3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of map objects. + * + * Each map is composed of a range of elements in the underlying child + * ColumnVector. The range for map i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public class MapColumnVector extends MultiValuedColumnVector { + + public ColumnVector keys; + public ColumnVector values; + + public MapColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE, null, null); + } + + /** + * Constructor for MapColumnVector + * + * @param len Vector length + * @param keys The keys column vector + * @param values The values column vector + */ + public MapColumnVector(int len, ColumnVector keys, ColumnVector values) { + super(len); + this.keys = keys; + this.values = values; + } + + @Override + protected void childFlatten(boolean useSelected, int[] selected, int size) { + keys.flatten(useSelected, selected, size); + values.flatten(useSelected, selected, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = true; + noNulls = false; + } else { + MapColumnVector input = (MapColumnVector) inputVector; + isNull[outElementNum] = false; + int offset = childCount; + int length = (int) input.lengths[inputElementNum]; + int inputOffset = (int) input.offsets[inputElementNum]; + offsets[outElementNum] = offset; + childCount += length; + lengths[outElementNum] = length; + keys.ensureSize(childCount, true); + values.ensureSize(childCount, true); + for (int i = 0; i < length; ++i) { + keys.setElement(i + offset, inputOffset + i, input.keys); + values.setElement(i + offset, inputOffset + i, input.values); + } + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + boolean isFirst = true; + for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) { + if (isFirst) { + isFirst = false; + } else { + buffer.append(", "); + } + buffer.append("{\"key\": "); + keys.stringifyValue(buffer, (int) i); + buffer.append(", \"value\": "); + values.stringifyValue(buffer, (int) i); + buffer.append('}'); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void init() { + super.init(); + keys.init(); + values.init(); + } + + @Override + public void reset() { + super.reset(); + keys.reset(); + values.reset(); + } + + @Override + public void unFlatten() { + super.unFlatten(); + if (!isRepeating || noNulls || !isNull[0]) { + keys.unFlatten(); + values.unFlatten(); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java new file mode 100644 index 0000000..d8451f0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * The representation of a vectorized column of multi-valued objects, such + * as lists and maps. + * + * Each object is composed of a range of elements in the underlying child + * ColumnVector. The range for list i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public abstract class MultiValuedColumnVector extends ColumnVector { + + public long[] offsets; + public long[] lengths; + // the number of children slots used + public int childCount; + + /** + * Constructor for MultiValuedColumnVector. + * + * @param len Vector length + */ + public MultiValuedColumnVector(int len) { + super(len); + childCount = 0; + offsets = new long[len]; + lengths = new long[len]; + } + + protected abstract void childFlatten(boolean useSelected, int[] selected, + int size); + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + + if (isRepeating) { + if (noNulls || !isNull[0]) { + if (selectedInUse) { + for (int i = 0; i < size; ++i) { + int row = sel[i]; + offsets[row] = offsets[0]; + lengths[row] = lengths[0]; + isNull[row] = false; + } + } else { + Arrays.fill(offsets, 0, size, offsets[0]); + Arrays.fill(lengths, 0, size, lengths[0]); + Arrays.fill(isNull, 0, size, false); + } + // We optimize by assuming that a repeating list/map will run from + // from 0 .. lengths[0] in the child vector. + // Sanity check the assumption that we can start at 0. + if (offsets[0] != 0) { + throw new IllegalArgumentException("Repeating offset isn't 0, but " + + offsets[0]); + } + childFlatten(false, null, (int) lengths[0]); + } else { + if (selectedInUse) { + for(int i=0; i < size; ++i) { + isNull[sel[i]] = true; + } + } else { + Arrays.fill(isNull, 0, size, true); + } + } + isRepeating = false; + noNulls = false; + } else { + if (selectedInUse) { + int childSize = 0; + for(int i=0; i < size; ++i) { + childSize += lengths[sel[i]]; + } + int[] childSelection = new int[childSize]; + int idx = 0; + for(int i=0; i < size; ++i) { + int row = sel[i]; + for(int elem=0; elem < lengths[row]; ++elem) { + childSelection[idx++] = (int) (offsets[row] + elem); + } + } + childFlatten(true, childSelection, childSize); + } else { + childFlatten(false, null, childCount); + } + flattenNoNulls(selectedInUse, sel, size); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > offsets.length) { + super.ensureSize(size, preserveData); + long[] oldOffsets = offsets; + offsets = new long[size]; + long oldLengths[] = lengths; + lengths = new long[size]; + if (preserveData) { + if (isRepeating) { + offsets[0] = oldOffsets[0]; + lengths[0] = oldLengths[0]; + } else { + System.arraycopy(oldOffsets, 0, offsets, 0 , oldOffsets.length); + System.arraycopy(oldLengths, 0, lengths, 0, oldLengths.length); + } + } + } + } + + /** + * Initializee the vector + */ + @Override + public void init() { + super.init(); + childCount = 0; + } + + /** + * Reset the vector for the next batch. + */ + @Override + public void reset() { + super.reset(); + childCount = 0; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java new file mode 100644 index 0000000..cf07bca --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of struct objects. + * + * Each field is represented by a separate inner ColumnVector. Since this + * ColumnVector doesn't own any per row data other that the isNull flag, the + * isRepeating only covers the isNull array. + */ +public class StructColumnVector extends ColumnVector { + + public ColumnVector[] fields; + + public StructColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Constructor for StructColumnVector + * + * @param len Vector length + * @param fields the field column vectors + */ + public StructColumnVector(int len, ColumnVector... fields) { + super(len); + this.fields = fields; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + for(int i=0; i < fields.length; ++i) { + fields[i].flatten(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields; + for (int i = 0; i < inputFields.length; ++i) { + fields[i].setElement(outElementNum, inputElementNum, inputFields[i]); + } + } else { + noNulls = false; + isNull[outElementNum] = true; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + for(int i=0; i < fields.length; ++i) { + if (i != 0) { + buffer.append(", "); + } + fields[i].stringifyValue(buffer, row); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + for(int i=0; i < fields.length; ++i) { + fields[i].ensureSize(size, preserveData); + } + } + + @Override + public void reset() { + super.reset(); + for(int i =0; i < fields.length; ++i) { + fields[i].reset(); + } + } + + @Override + public void init() { + super.init(); + for(int i =0; i < fields.length; ++i) { + fields[i].init(); + } + } + + @Override + public void unFlatten() { + super.unFlatten(); + for(int i=0; i < fields.length; ++i) { + fields[i].unFlatten(); + } + } + + @Override + public void setRepeating(boolean isRepeating) { + super.setRepeating(isRepeating); + for(int i=0; i < fields.length; ++i) { + fields[i].setRepeating(isRepeating); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java new file mode 100644 index 0000000..298d588 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * The representation of a vectorized column of struct objects. + * + * Each field is represented by a separate inner ColumnVector. Since this + * ColumnVector doesn't own any per row data other that the isNull flag, the + * isRepeating only covers the isNull array. + */ +public class UnionColumnVector extends ColumnVector { + + public int[] tags; + public ColumnVector[] fields; + + public UnionColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Constructor for UnionColumnVector + * + * @param len Vector length + * @param fields the field column vectors + */ + public UnionColumnVector(int len, ColumnVector... fields) { + super(len); + tags = new int[len]; + this.fields = fields; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + for(int i=0; i < fields.length; ++i) { + fields[i].flatten(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + UnionColumnVector input = (UnionColumnVector) inputVector; + tags[outElementNum] = input.tags[inputElementNum]; + fields[tags[outElementNum]].setElement(outElementNum, inputElementNum, + input.fields[tags[outElementNum]]); + } else { + noNulls = false; + isNull[outElementNum] = true; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append("{\"tag\": "); + buffer.append(tags[row]); + buffer.append(", \"value\": "); + fields[tags[row]].stringifyValue(buffer, row); + buffer.append('}'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (tags.length < size) { + if (preserveData) { + int[] oldTags = tags; + tags = new int[size]; + System.arraycopy(oldTags, 0, tags, 0, oldTags.length); + } else { + tags = new int[size]; + } + for(int i=0; i < fields.length; ++i) { + fields[i].ensureSize(size, preserveData); + } + } + } + + @Override + public void reset() { + super.reset(); + for(int i =0; i < fields.length; ++i) { + fields[i].reset(); + } + } + + @Override + public void init() { + super.init(); + for(int i =0; i < fields.length; ++i) { + fields[i].init(); + } + } + + @Override + public void unFlatten() { + super.unFlatten(); + for(int i=0; i < fields.length; ++i) { + fields[i].unFlatten(); + } + } + + @Override + public void setRepeating(boolean isRepeating) { + super.setRepeating(isRepeating); + for(int i=0; i < fields.length; ++i) { + fields[i].setRepeating(isRepeating); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index b7e13dd..d258e2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -160,6 +160,41 @@ public static ColumnVector createColumnVector(TypeInfo typeInfo) { + primitiveTypeInfo.getPrimitiveCategory()); } } + case STRUCT: + { + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List typeInfoList = structTypeInfo.getAllStructFieldTypeInfos(); + ColumnVector[] children = new ColumnVector[typeInfoList.size()]; + for(int i=0; i < children.length; ++i) { + children[i] = + createColumnVector(typeInfoList.get(i)); + } + return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + children); + } + case UNION: + { + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List typeInfoList = unionTypeInfo.getAllUnionObjectTypeInfos(); + ColumnVector[] children = new ColumnVector[typeInfoList.size()]; + for(int i=0; i < children.length; ++i) { + children[i] = createColumnVector(typeInfoList.get(i)); + } + return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children); + } + case LIST: + { + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + createColumnVector(listTypeInfo.getListElementTypeInfo())); + } + case MAP: + { + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + createColumnVector(mapTypeInfo.getMapKeyTypeInfo()), + createColumnVector(mapTypeInfo.getMapValueTypeInfo())); + } default: throw new RuntimeException("Vectorization is not supported for datatype:" + typeInfo.getCategory()); @@ -532,6 +567,48 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect return typeInfoList.toArray(new TypeInfo[0]); } + static ColumnVector cloneColumnVector(ColumnVector source + ) throws HiveException{ + if (source instanceof LongColumnVector) { + return new LongColumnVector(((LongColumnVector) source).vector.length); + } else if (source instanceof DoubleColumnVector) { + return new DoubleColumnVector(((DoubleColumnVector) source).vector.length); + } else if (source instanceof BytesColumnVector) { + return new BytesColumnVector(((BytesColumnVector) source).vector.length); + } else if (source instanceof DecimalColumnVector) { + DecimalColumnVector decColVector = (DecimalColumnVector) source; + return new DecimalColumnVector(decColVector.vector.length, + decColVector.precision, + decColVector.scale); + } else if (source instanceof ListColumnVector) { + ListColumnVector src = (ListColumnVector) source; + ColumnVector child = cloneColumnVector(src.child); + return new ListColumnVector(src.offsets.length, child); + } else if (source instanceof MapColumnVector) { + MapColumnVector src = (MapColumnVector) source; + ColumnVector keys = cloneColumnVector(src.keys); + ColumnVector values = cloneColumnVector(src.values); + return new MapColumnVector(src.offsets.length, keys, values); + } else if (source instanceof StructColumnVector) { + StructColumnVector src = (StructColumnVector) source; + ColumnVector[] copy = new ColumnVector[src.fields.length]; + for(int i=0; i < copy.length; ++i) { + copy[i] = cloneColumnVector(src.fields[i]); + } + return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, copy); + } else if (source instanceof UnionColumnVector) { + UnionColumnVector src = (UnionColumnVector) source; + ColumnVector[] copy = new ColumnVector[src.fields.length]; + for(int i=0; i < copy.length; ++i) { + copy[i] = cloneColumnVector(src.fields[i]); + } + return new UnionColumnVector(src.tags.length, copy); + } else + throw new HiveException("Column vector class " + + source.getClass().getName() + + " is not supported!"); + } + /** * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty * @param batch the batch to imitate @@ -541,23 +618,8 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException { VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols); for (int i = 0; i < batch.numCols; i++) { - ColumnVector colVector = batch.cols[i]; - if (colVector != null) { - ColumnVector newColVector; - if (colVector instanceof LongColumnVector) { - newColVector = new LongColumnVector(); - } else if (colVector instanceof DoubleColumnVector) { - newColVector = new DoubleColumnVector(); - } else if (colVector instanceof BytesColumnVector) { - newColVector = new BytesColumnVector(); - } else if (colVector instanceof DecimalColumnVector) { - DecimalColumnVector decColVector = (DecimalColumnVector) colVector; - newColVector = new DecimalColumnVector(decColVector.precision, decColVector.scale); - } else { - throw new HiveException("Column vector class " + colVector.getClass().getName() + - " is not supported!"); - } - newBatch.cols[i] = newColVector; + if (batch.cols[i] != null) { + newBatch.cols[i] = cloneColumnVector(batch.cols[i]); newBatch.cols[i].init(); } } diff --git ql/src/test/queries/clientpositive/vector_complex.q ql/src/test/queries/clientpositive/vector_complex.q new file mode 100644 index 0000000..0190b4e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_complex.q @@ -0,0 +1,71 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.explain.user=false; + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC; + +INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; + +create temporary table temptable1 ( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + vc varchar(50), + ch char(50), + ts timestamp, + dt date, + ar array, + st struct, + ma map + ) stored as orc; + +explain +insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics'; +explain +select count(*) from temptable1; + +insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics'; +select count(*) from temptable1; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_complex.q.out ql/src/test/results/clientpositive/tez/vector_complex.q.out new file mode 100644 index 0000000..a9c14ab --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_complex.q.out @@ -0,0 +1,296 @@ +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: create temporary table temptable1 ( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + vc varchar(50), + ch char(50), + ts timestamp, + dt date, + ar array, + st struct, + ma map + ) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@temptable1 +POSTHOOK: query: create temporary table temptable1 ( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + vc varchar(50), + ch char(50), + ts timestamp, + dt date, + ar array, + st struct, + ma map + ) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@temptable1 +PREHOOK: query: explain +insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s = 'mathematics') (type: boolean) + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), dc (type: decimal(38,18)), bo (type: boolean), 'mathematics' (type: string), 'mathematics' (type: varchar(50)), 'mathematics ' (type: char(50)), ts (type: timestamp), dt (type: date), array(i,(i + 1)) (type: array), struct('mathematics',i) (type: struct), map('mathematics':i) (type: map) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.temptable1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.temptable1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: explain +select count(*) from temptable1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from temptable1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: temptable1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +PREHOOK: Output: default@temptable1 +POSTHOOK: query: insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +POSTHOOK: Output: default@temptable1 +POSTHOOK: Lineage: temptable1.ar EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.b SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: temptable1.bo SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: temptable1.ch EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: temptable1.d SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: temptable1.dc SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: temptable1.dt SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: temptable1.f SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: temptable1.i SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.ma EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), (vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.s SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: temptable1.si SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: temptable1.st EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), (vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.t SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: temptable1.ts SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: temptable1.vc EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: select count(*) from temptable1 +PREHOOK: type: QUERY +PREHOOK: Input: default@temptable1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from temptable1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temptable1 +#### A masked pattern was here #### +75 diff --git ql/src/test/results/clientpositive/vector_complex.q.out ql/src/test/results/clientpositive/vector_complex.q.out new file mode 100644 index 0000000..889bc6a --- /dev/null +++ ql/src/test/results/clientpositive/vector_complex.q.out @@ -0,0 +1,316 @@ +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: create temporary table temptable1 ( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + vc varchar(50), + ch char(50), + ts timestamp, + dt date, + ar array, + st struct, + ma map + ) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@temptable1 +POSTHOOK: query: create temporary table temptable1 ( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + vc varchar(50), + ch char(50), + ts timestamp, + dt date, + ar array, + st struct, + ma map + ) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@temptable1 +PREHOOK: query: explain +insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s = 'mathematics') (type: boolean) + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), dc (type: decimal(38,18)), bo (type: boolean), 'mathematics' (type: string), 'mathematics' (type: varchar(50)), 'mathematics ' (type: char(50)), ts (type: timestamp), dt (type: date), array(i,(i + 1)) (type: array), struct('mathematics',i) (type: struct), map('mathematics':i) (type: map) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.temptable1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.temptable1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: explain +select count(*) from temptable1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from temptable1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: temptable1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +PREHOOK: Output: default@temptable1 +POSTHOOK: query: insert overwrite table temptable1 + select t, si, i, b, f, d, dc, bo, s, s, s, ts, dt, + array(i, i+1), struct(s, i), map(s, i) from vectortab2korc where s='mathematics' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +POSTHOOK: Output: default@temptable1 +POSTHOOK: Lineage: temptable1.ar EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.b SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: temptable1.bo SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: temptable1.ch EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: temptable1.d SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: temptable1.dc SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: temptable1.dt SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: temptable1.f SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: temptable1.i SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.ma EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), (vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.s SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: temptable1.si SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: temptable1.st EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), (vectortab2korc)vectortab2korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: temptable1.t SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: temptable1.ts SIMPLE [(vectortab2korc)vectortab2korc.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: temptable1.vc EXPRESSION [(vectortab2korc)vectortab2korc.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: select count(*) from temptable1 +PREHOOK: type: QUERY +PREHOOK: Input: default@temptable1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from temptable1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temptable1 +#### A masked pattern was here #### +75