diff --git itests/qtest/testconfiguration.properties itests/qtest/testconfiguration.properties index 6a3ee1d..8f0262d 100644 --- itests/qtest/testconfiguration.properties +++ itests/qtest/testconfiguration.properties @@ -1,5 +1,5 @@ minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q,temp_table_external.q minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,tez_join_hash.q -minitez.query.files.shared=cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q +minitez.query.files.shared=cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_data_types.q beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 064b94e..16454e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -123,6 +124,7 @@ private static void allocateColumnVector(StructObjectInspector oi, case DOUBLE: cvList.add(new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); break; + case BINARY: case STRING: cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); break; @@ -237,7 +239,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, // float/double. String types have no default value for null. switch (poi.getPrimitiveCategory()) { case BOOLEAN: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0; lcv.isNull[rowIndex] = false; @@ -248,7 +250,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case BYTE: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { lcv.vector[rowIndex] = ((ByteWritable) writableCol).get(); lcv.isNull[rowIndex] = false; @@ -259,7 +261,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case SHORT: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { lcv.vector[rowIndex] = ((ShortWritable) writableCol).get(); lcv.isNull[rowIndex] = false; @@ -270,7 +272,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case INT: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { lcv.vector[rowIndex] = ((IntWritable) writableCol).get(); lcv.isNull[rowIndex] = false; @@ -281,7 +283,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case LONG: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { lcv.vector[rowIndex] = ((LongWritable) writableCol).get(); lcv.isNull[rowIndex] = false; @@ -292,7 +294,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case DATE: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays(); lcv.isNull[rowIndex] = false; @@ -303,7 +305,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case FLOAT: { - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off+i]; + DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i]; if (writableCol != null) { dcv.vector[rowIndex] = ((FloatWritable) writableCol).get(); dcv.isNull[rowIndex] = false; @@ -314,7 +316,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case DOUBLE: { - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off+i]; + DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i]; if (writableCol != null) { dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); dcv.isNull[rowIndex] = false; @@ -325,7 +327,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off+i]; + LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; if (writableCol != null) { Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t); @@ -336,8 +338,27 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } } break; + case BINARY: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + BytesWritable bw = (BytesWritable) writableCol; + byte[] bytes = bw.getBytes(); + int start = buffer.getLength(); + int length = bytes.length; + try { + buffer.write(bytes, 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; case STRING: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[off+i]; + BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; if (writableCol != null) { bcv.isNull[rowIndex] = false; Text colText = (Text) writableCol; @@ -355,7 +376,7 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } break; case DECIMAL: - DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off+i]; + DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i]; if (writableCol != null) { dcv.isNull[rowIndex] = false; HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java index 5291c5c..9669c91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java @@ -152,13 +152,20 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe ByteBuffer b = Text.encode(String.valueOf(dcv.vector[rowIndex])); serializeVectorStream.write(b.array(), 0, b.limit()); break; - case STRING: + case BINARY: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; + byte[] bytes = bcv.vector[rowIndex]; + serializeVectorStream.write(bytes, 0, bytes.length); + } + break; + case STRING: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex], bcv.start[rowIndex], bcv.length[rowIndex], serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams .getNeedsEscape()); + } break; case TIMESTAMP: LongColumnVector tcv = (LongColumnVector) batch.cols[k]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 152b817..2536817 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -278,7 +278,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP, // DATE and DECIMAL switch (poi.getPrimitiveCategory()) { case BOOLEAN: @@ -294,6 +294,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case DOUBLE: result.cols[j] = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; + case BINARY: case STRING: result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; @@ -404,7 +405,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill((Boolean)value == true ? 1 : 0); + lcv.fill((Boolean) value == true ? 1 : 0); lcv.isNull[0] = false; } } @@ -417,7 +418,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill((Byte)value); + lcv.fill((Byte) value); lcv.isNull[0] = false; } } @@ -430,7 +431,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill((Short)value); + lcv.fill((Short) value); lcv.isNull[0] = false; } } @@ -443,7 +444,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill((Integer)value); + lcv.fill((Integer) value); lcv.isNull[0] = false; } } @@ -456,7 +457,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill((Long)value); + lcv.fill((Long) value); lcv.isNull[0] = false; } } @@ -469,7 +470,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill(((Date)value).getTime()); + lcv.fill(((Date) value).getTime()); lcv.isNull[0] = false; } } @@ -521,17 +522,31 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti dv.isNull[0] = true; dv.isRepeating = true; } else { - HiveDecimal hd = (HiveDecimal)(value); - dv.vector[0] = new Decimal128(hd.toString(), (short)hd.scale()); + HiveDecimal hd = (HiveDecimal) value; + dv.vector[0] = new Decimal128(hd.toString(), (short) hd.scale()); dv.isRepeating = true; dv.isNull[0] = false; } } break; - + + case BINARY: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex]; + byte[] bytes = (byte[]) value; + if (bytes == null) { + bcv.noNulls = false; + bcv.isNull[0] = true; + bcv.isRepeating = true; + } else { + bcv.fill(bytes); + bcv.isNull[0] = false; + } + } + break; + case STRING: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex]; - String sVal = (String)value; + String sVal = (String) value; if (sVal == null) { bcv.noNulls = false; bcv.isNull[0] = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 268eee5..f5023bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -910,8 +910,11 @@ void skipRows(long items) throws IOException { private InStream stream; private IntegerReader lengths = null; + private final LongColumnVector scratchlcv; + BinaryTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); + scratchlcv = new LongColumnVector(); } @Override @@ -969,8 +972,18 @@ Object next(Object previous) throws IOException { @Override Object nextVector(Object previousVector, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextBatch is not supported operation for Binary type"); + BytesColumnVector result = null; + if (previousVector == null) { + result = new BytesColumnVector(); + } else { + result = (BytesColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); + return result; } @Override @@ -1361,6 +1374,66 @@ void skipRows(long items) throws IOException { } } + private static class BytesColumnVectorUtil { + // This method has the common code for reading in bytes into a BytesColumnVector. + // It is used by the BINARY, STRING, CHAR, VARCHAR types. + public static void setRefToOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, + BytesColumnVector result, long batchSize) throws IOException { + + // Read lengths + scratchlcv.isNull = result.isNull; // Notice we are replacing the isNull vector here... + lengths.nextVector(scratchlcv, batchSize); + int totalLength = 0; + if (!scratchlcv.isRepeating) { + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + totalLength += (int) scratchlcv.vector[i]; + } + } + } else { + if (!scratchlcv.isNull[0]) { + totalLength = (int) (batchSize * scratchlcv.vector[0]); + } + } + + // Read all the strings for this batch + byte[] allBytes = new byte[totalLength]; + int offset = 0; + int len = totalLength; + while (len > 0) { + int bytesRead = stream.read(allBytes, offset, len); + if (bytesRead < 0) { + throw new EOFException("Can't finish byte read from " + stream); + } + len -= bytesRead; + offset += bytesRead; + } + + // Too expensive to figure out 'repeating' by comparisons. + result.isRepeating = false; + offset = 0; + if (!scratchlcv.isRepeating) { + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]); + offset += scratchlcv.vector[i]; + } else { + result.setRef(i, allBytes, 0, 0); + } + } + } else { + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]); + offset += scratchlcv.vector[0]; + } else { + result.setRef(i, allBytes, 0, 0); + } + } + } + } + } + /** * A reader for string columns that are direct encoded in the current * stripe. @@ -1443,57 +1516,7 @@ Object nextVector(Object previousVector, long batchSize) throws IOException { // Read present/isNull stream super.nextVector(result, batchSize); - // Read lengths - scratchlcv.isNull = result.isNull; - lengths.nextVector(scratchlcv, batchSize); - int totalLength = 0; - if (!scratchlcv.isRepeating) { - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - totalLength += (int) scratchlcv.vector[i]; - } - } - } else { - if (!scratchlcv.isNull[0]) { - totalLength = (int) (batchSize * scratchlcv.vector[0]); - } - } - - //Read all the strings for this batch - byte[] allBytes = new byte[totalLength]; - int offset = 0; - int len = totalLength; - while (len > 0) { - int bytesRead = stream.read(allBytes, offset, len); - if (bytesRead < 0) { - throw new EOFException("Can't finish byte read from " + stream); - } - len -= bytesRead; - offset += bytesRead; - } - - // Too expensive to figure out 'repeating' by comparisons. - result.isRepeating = false; - offset = 0; - if (!scratchlcv.isRepeating) { - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]); - offset += scratchlcv.vector[i]; - } else { - result.setRef(i, allBytes, 0, 0); - } - } - } else { - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]); - offset += scratchlcv.vector[0]; - } else { - result.setRef(i, allBytes, 0, 0); - } - } - } + BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); return result; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 7c7f14b..9ad4648 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -133,6 +133,7 @@ public Vectorizer() { patternBuilder.append("|short"); patternBuilder.append("|timestamp"); patternBuilder.append("|boolean"); + patternBuilder.append("|binary"); patternBuilder.append("|string"); patternBuilder.append("|byte"); patternBuilder.append("|float"); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java index dd54aed..f7fea17 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -285,6 +286,15 @@ void ValidateRowBatch(VectorizedRowBatch batch) throws IOException, SerDeExcepti Assert.assertEquals(true, dcv.vector[i] == ((DoubleWritable) writableCol).get()); } break; + case BINARY: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[j]; + BytesWritable colBinary = (BytesWritable) writableCol; + BytesWritable batchBinary = (BytesWritable) bcv.getWritableObject(i); + byte[] a = colBinary.getBytes(); + byte[] b = batchBinary.getBytes(); + Assert.assertEquals(true, a.equals(b)); + } + break; case STRING: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[j]; Text colText = (Text) writableCol; diff --git ql/src/test/queries/clientpositive/vector_data_types.q ql/src/test/queries/clientpositive/vector_data_types.q new file mode 100644 index 0000000..4c38968 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_data_types.q @@ -0,0 +1,47 @@ + +DROP TABLE over1k; +DROP TABLE over1korc; + +-- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k; + +CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC; + +INSERT INTO TABLE over1korc SELECT * FROM over1k; + +SET hive.vectorized.execution.enabled=false; + +EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20; + +SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20; + +SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_data_types.q.out ql/src/test/results/clientpositive/tez/vector_data_types.q.out new file mode 100644 index 0000000..ba86503 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_data_types.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +108 301 65536 4294967357 90.05 17.59 true ethan johnson 2013-03-01 09:11:58.703271 75.7 undecided +118 497 65536 4294967381 50.32 12.72 false david nixon 2013-03-01 09:11:58.703285 83.48 values clariffication +18 280 65536 4294967320 32.92 45.94 false holly white 2013-03-01 09:11:58.703086 58.86 topology +69 489 65536 4294967404 33.52 17.99 false oscar ichabod 2013-03-01 09:11:58.703247 32.68 topology +27 405 65536 4294967508 82.24 29.41 true oscar ovid 2013-03-01 09:11:58.703166 16.85 biology +42 495 65536 4294967431 43.57 46.81 false tom johnson 2013-03-01 09:11:58.703245 62.25 american history +NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history +59 431 65537 4294967326 11.34 2.9 true oscar robinson 2013-03-01 09:11:58.703161 81.04 zync studies +63 458 65537 4294967463 21.94 49.71 true fred van buren 2013-03-01 09:11:58.703278 99.34 wind surfing +121 355 65537 4294967437 85.9 10.99 true calvin brown 2013-03-01 09:11:58.703254 4.49 yard duty +72 267 65537 4294967460 55.2 42.89 false oscar carson 2013-03-01 09:11:58.703153 51.91 topology +22 264 65537 4294967419 8.07 10.71 false david xylophone 2013-03-01 09:11:58.703136 11.87 undecided +101 283 65538 4294967527 84.03 27.29 true yuri brown 2013-03-01 09:11:58.703306 44.24 biology +109 376 65538 4294967453 59.61 35.62 true priscilla xylophone 2013-03-01 09:11:58.703286 9.53 study skills +83 353 65538 4294967339 0.08 23.91 true holly thompson 2013-03-01 09:11:58.703157 30.27 quiet hour +29 331 65539 4294967420 73.18 28.96 true ethan brown 2013-03-01 09:11:58.703094 58.85 zync studies +70 430 65539 4294967534 18.89 43.84 true tom carson 2013-03-01 09:11:58.703182 21.93 joggying +120 331 65539 4294967324 88.02 40.94 true holly nixon 2013-03-01 09:11:58.703262 96.64 yard duty +71 298 65540 4294967510 91.63 9.6 false gabriella zipper 2013-03-01 09:11:58.70314 68.14 geology +114 385 65541 4294967458 73.48 34.97 true oscar quirinius 2013-03-01 09:11:58.703143 72.33 xylophone band +PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +108 301 65536 4294967357 90.05 17.59 true ethan johnson 1860-11-12 20:05:55.011470936 75.7 undecided +118 497 65536 4294967381 50.32 12.72 false david nixon 1860-11-12 20:05:55.011484936 83.48 values clariffication +18 280 65536 4294967320 32.92 45.94 false holly white 1860-11-12 20:05:55.011285936 58.86 topology +69 489 65536 4294967404 33.52 17.99 false oscar ichabod 1860-11-12 20:05:55.011446936 32.68 topology +27 405 65536 4294967508 82.24 29.41 true oscar ovid 1860-11-12 20:05:55.011365936 16.85 biology +42 495 65536 4294967431 43.57 46.81 false tom johnson 1860-11-12 20:05:55.011444936 62.25 american history +NULL 409 65536 4294967490 46.97 25.92 false fred miller 1860-11-12 20:05:55.011315936 33.45 history +59 431 65537 4294967326 11.34 2.9 true oscar robinson 1860-11-12 20:05:55.011360936 81.04 zync studies +63 458 65537 4294967463 21.94 49.71 true fred van buren 1860-11-12 20:05:55.011477936 99.34 wind surfing +121 355 65537 4294967437 85.9 10.99 true calvin brown 1860-11-12 20:05:55.011453936 4.49 yard duty +72 267 65537 4294967460 55.2 42.89 false oscar carson 1860-11-12 20:05:55.011352936 51.91 topology +22 264 65537 4294967419 8.07 10.71 false david xylophone 1860-11-12 20:05:55.011335936 11.87 undecided +101 283 65538 4294967527 84.03 27.29 true yuri brown 1860-11-12 20:05:55.011505936 44.24 biology +109 376 65538 4294967453 59.61 35.62 true priscilla xylophone 1860-11-12 20:05:55.011485936 9.53 study skills +83 353 65538 4294967339 0.08 23.91 true holly thompson 1860-11-12 20:05:55.011356936 30.27 quiet hour +29 331 65539 4294967420 73.18 28.96 true ethan brown 1860-11-12 20:05:55.011293936 58.85 zync studies +70 430 65539 4294967534 18.89 43.84 true tom carson 1860-11-12 20:05:55.011381936 21.93 joggying +120 331 65539 4294967324 88.02 40.94 true holly nixon 1860-11-12 20:05:55.011461936 96.64 yard duty +71 298 65540 4294967510 91.63 9.6 false gabriella zipper 1860-11-12 20:05:55.011339936 68.14 geology +114 385 65541 4294967458 73.48 34.97 true oscar quirinius 1860-11-12 20:05:55.011342936 72.33 xylophone band diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out new file mode 100644 index 0000000..007f4e8 --- /dev/null +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -0,0 +1,246 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +108 301 65536 4294967357 90.05 17.59 true ethan johnson 2013-03-01 09:11:58.703271 75.7 undecided +118 497 65536 4294967381 50.32 12.72 false david nixon 2013-03-01 09:11:58.703285 83.48 values clariffication +18 280 65536 4294967320 32.92 45.94 false holly white 2013-03-01 09:11:58.703086 58.86 topology +69 489 65536 4294967404 33.52 17.99 false oscar ichabod 2013-03-01 09:11:58.703247 32.68 topology +27 405 65536 4294967508 82.24 29.41 true oscar ovid 2013-03-01 09:11:58.703166 16.85 biology +42 495 65536 4294967431 43.57 46.81 false tom johnson 2013-03-01 09:11:58.703245 62.25 american history +NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history +59 431 65537 4294967326 11.34 2.9 true oscar robinson 2013-03-01 09:11:58.703161 81.04 zync studies +63 458 65537 4294967463 21.94 49.71 true fred van buren 2013-03-01 09:11:58.703278 99.34 wind surfing +121 355 65537 4294967437 85.9 10.99 true calvin brown 2013-03-01 09:11:58.703254 4.49 yard duty +72 267 65537 4294967460 55.2 42.89 false oscar carson 2013-03-01 09:11:58.703153 51.91 topology +22 264 65537 4294967419 8.07 10.71 false david xylophone 2013-03-01 09:11:58.703136 11.87 undecided +101 283 65538 4294967527 84.03 27.29 true yuri brown 2013-03-01 09:11:58.703306 44.24 biology +109 376 65538 4294967453 59.61 35.62 true priscilla xylophone 2013-03-01 09:11:58.703286 9.53 study skills +83 353 65538 4294967339 0.08 23.91 true holly thompson 2013-03-01 09:11:58.703157 30.27 quiet hour +29 331 65539 4294967420 73.18 28.96 true ethan brown 2013-03-01 09:11:58.703094 58.85 zync studies +70 430 65539 4294967534 18.89 43.84 true tom carson 2013-03-01 09:11:58.703182 21.93 joggying +120 331 65539 4294967324 88.02 40.94 true holly nixon 2013-03-01 09:11:58.703262 96.64 yard duty +71 298 65540 4294967510 91.63 9.6 false gabriella zipper 2013-03-01 09:11:58.70314 68.14 geology +114 385 65541 4294967458 73.48 34.97 true oscar quirinius 2013-03-01 09:11:58.703143 72.33 xylophone band +PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY i LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +108 301 65536 4294967357 90.05 17.59 true ethan johnson 1860-11-12 20:05:55.011470936 75.7 undecided +118 497 65536 4294967381 50.32 12.72 false david nixon 1860-11-12 20:05:55.011484936 83.48 values clariffication +18 280 65536 4294967320 32.92 45.94 false holly white 1860-11-12 20:05:55.011285936 58.86 topology +69 489 65536 4294967404 33.52 17.99 false oscar ichabod 1860-11-12 20:05:55.011446936 32.68 topology +27 405 65536 4294967508 82.24 29.41 true oscar ovid 1860-11-12 20:05:55.011365936 16.85 biology +42 495 65536 4294967431 43.57 46.81 false tom johnson 1860-11-12 20:05:55.011444936 62.25 american history +NULL 409 65536 4294967490 46.97 25.92 false fred miller 1860-11-12 20:05:55.011315936 33.45 history +59 431 65537 4294967326 11.34 2.9 true oscar robinson 1860-11-12 20:05:55.011360936 81.04 zync studies +63 458 65537 4294967463 21.94 49.71 true fred van buren 1860-11-12 20:05:55.011477936 99.34 wind surfing +121 355 65537 4294967437 85.9 10.99 true calvin brown 1860-11-12 20:05:55.011453936 4.49 yard duty +72 267 65537 4294967460 55.2 42.89 false oscar carson 1860-11-12 20:05:55.011352936 51.91 topology +22 264 65537 4294967419 8.07 10.71 false david xylophone 1860-11-12 20:05:55.011335936 11.87 undecided +101 283 65538 4294967527 84.03 27.29 true yuri brown 1860-11-12 20:05:55.011505936 44.24 biology +109 376 65538 4294967453 59.61 35.62 true priscilla xylophone 1860-11-12 20:05:55.011485936 9.53 study skills +83 353 65538 4294967339 0.08 23.91 true holly thompson 1860-11-12 20:05:55.011356936 30.27 quiet hour +29 331 65539 4294967420 73.18 28.96 true ethan brown 1860-11-12 20:05:55.011293936 58.85 zync studies +70 430 65539 4294967534 18.89 43.84 true tom carson 1860-11-12 20:05:55.011381936 21.93 joggying +120 331 65539 4294967324 88.02 40.94 true holly nixon 1860-11-12 20:05:55.011461936 96.64 yard duty +71 298 65540 4294967510 91.63 9.6 false gabriella zipper 1860-11-12 20:05:55.011339936 68.14 geology +114 385 65541 4294967458 73.48 34.97 true oscar quirinius 1860-11-12 20:05:55.011342936 72.33 xylophone band