diff --git itests/qtest/testconfiguration.properties itests/qtest/testconfiguration.properties index 2d3f331..0ca060f 100644 --- itests/qtest/testconfiguration.properties +++ itests/qtest/testconfiguration.properties @@ -1,5 +1,5 @@ minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q,temp_table_external.q minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,tez_join_hash.q -minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q,vector_decimal_aggregate.q,vector_left_outer_join.q,vectorization_12.q,vectorization_13.q,vectorization_14.q,vectorization_9.q,vectorization_part_project.q,vectorization_short_regress.q,vectorized_mapjoin.q,vectorized_nested_mapjoin.q,vectorized_shufflejoin.q,vectorized_timestamp_funcs.q,vector_data_types.q +minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q,vector_decimal_aggregate.q,vector_left_outer_join.q,vectorization_12.q,vectorization_13.q,vectorization_14.q,vectorization_9.q,vectorization_part_project.q,vectorization_short_regress.q,vectorized_mapjoin.q,vectorized_nested_mapjoin.q,vectorized_shufflejoin.q,vectorized_timestamp_funcs.q,vector_data_types.q,vector_char_2.q,vector_char_simple.q,vector_string_temp.q,vector_varchar_simple.q beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 535e4b3..30c05b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -122,6 +122,9 @@ public static final Pattern decimalTypePattern = Pattern.compile("decimal.*", Pattern.CASE_INSENSITIVE); + public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", + Pattern.CASE_INSENSITIVE); + //Map column number to type private final OutputColumnManager ocm; @@ -545,6 +548,12 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException case STRING: udfClass = new UDFToString(); break; + case CHAR: + genericUdf = new GenericUDFToChar(); + break; + case VARCHAR: + genericUdf = new GenericUDFToVarchar(); + break; case BOOLEAN: udfClass = new UDFToBoolean(); break; @@ -589,8 +598,8 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { Class udfClass = bridge.getUdfClass(); if (udfClass.equals(UDFHex.class) || udfClass.equals(UDFConv.class) - || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string") - || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string") + || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) + || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) || udfClass.equals(UDFToString.class) && (arg0Type(expr).equals("timestamp") || arg0Type(expr).equals("double") @@ -1395,7 +1404,7 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) } public static boolean isStringFamily(String resultType) { - return resultType.equalsIgnoreCase("string"); + return resultType.equalsIgnoreCase("string") || charVarcharTypePattern.matcher(resultType).matches(); } public static boolean isDatetimeFamily(String resultType) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 16454e7..a4ccb07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -126,6 +126,8 @@ private static void allocateColumnVector(StructObjectInspector oi, break; case BINARY: case STRING: + case CHAR: + case VARCHAR: cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); break; case DECIMAL: @@ -357,7 +359,9 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; if (writableCol != null) { bcv.isNull[rowIndex] = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java index 9669c91..5ce7553 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java @@ -158,7 +158,10 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe serializeVectorStream.write(bytes, 0, bytes.length); } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { + // Is it correct to escape CHAR and VARCHAR? BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex], bcv.start[rowIndex], diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 2536817..157e5d1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -278,7 +278,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP, + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, CHAR, VARCHAR, TIMESTAMP, // DATE and DECIMAL switch (poi.getPrimitiveCategory()) { case BOOLEAN: @@ -296,6 +296,8 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException break; case BINARY: case STRING: + case CHAR: + case VARCHAR: result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; case DECIMAL: @@ -544,7 +546,9 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex]; String sVal = (String) value; if (sVal == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index eeb76d7..f4f32df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -28,6 +28,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -46,6 +47,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector; @@ -414,6 +416,9 @@ public static VectorExpressionWriter genVectorExpressionWritable( case STRING: return genVectorExpressionWritableString( (SettableStringObjectInspector) fieldObjInspector); + case CHAR: + return genVectorExpressionWritableChar( + (SettableHiveCharObjectInspector) fieldObjInspector); case VARCHAR: return genVectorExpressionWritableVarchar( (SettableHiveVarcharObjectInspector) fieldObjInspector); @@ -569,6 +574,46 @@ public Object initValue(Object ignored) { }.init(fieldObjInspector); } + private static VectorExpressionWriter genVectorExpressionWritableChar( + SettableHiveCharObjectInspector fieldObjInspector) throws HiveException { + return new VectorExpressionWriterBytes() { + private Object obj; + private Text text; + + public VectorExpressionWriter init(SettableHiveCharObjectInspector objInspector) + throws HiveException { + super.init(objInspector); + this.text = new Text(); + this.obj = initValue(null); + return this; + } + + @Override + public Object writeValue(byte[] value, int start, int length) throws HiveException { + text.set(value, start, length); + ((SettableHiveCharObjectInspector) this.objectInspector).set(this.obj, text.toString()); + return this.obj; + } + + @Override + public Object setValue(Object field, byte[] value, int start, int length) + throws HiveException { + if (null == field) { + field = initValue(null); + } + text.set(value, start, length); + ((SettableHiveCharObjectInspector) this.objectInspector).set(field, text.toString()); + return field; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableHiveCharObjectInspector) this.objectInspector) + .create(new HiveChar(StringUtils.EMPTY, -1)); + } + }.init(fieldObjInspector); + } + private static VectorExpressionWriter genVectorExpressionWritableVarchar( SettableHiveVarcharObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterBytes() { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index f5023bb..1e64c41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -1732,6 +1733,15 @@ Object next(Object previous) throws IOException { result.enforceMaxLength(maxLength); return result; } + + @Override + Object nextVector(Object previousVector, long batchSize) throws IOException { + // The CHAR data type is stored internally in ORC as a STRING that includes the blank padding. + Object object = super.nextVector(previousVector, batchSize); + + // Should we walk the byte arrays and do truncation like the above 'next' method is doing? + return object; + } } private static class VarcharTreeReader extends StringTreeReader { @@ -1760,6 +1770,15 @@ Object next(Object previous) throws IOException { result.enforceMaxLength(maxLength); return result; } + + @Override + Object nextVector(Object previousVector, long batchSize) throws IOException { + // The VARCHAR data type is stored internally in ORC as a STRING. + Object object = super.nextVector(previousVector, batchSize); + + // Should we walk the byte arrays and do truncation like the above 'next' method is doing? + return object; + } } private static class StructTreeReader extends TreeReader { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e778ba4..648de70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -155,6 +155,10 @@ public Vectorizer() { // The regex matches only the "decimal" prefix of the type. patternBuilder.append("|decimal.*"); + // CHAR and VARCHAR types can be specified with maximum length. + patternBuilder.append("|char.*"); + patternBuilder.append("|varchar.*"); + supportedDataTypesPattern = Pattern.compile(patternBuilder.toString()); supportedGenericUDFs.add(GenericUDFOPPlus.class); diff --git ql/src/test/queries/clientpositive/vector_char_2.q ql/src/test/queries/clientpositive/vector_char_2.q new file mode 100644 index 0000000..0828ca1 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_2.q @@ -0,0 +1,49 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key char(10), + value char(20) +) stored as orc; + +insert overwrite table char_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +drop table char_2; diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q new file mode 100644 index 0000000..ec46630 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -0,0 +1,43 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key char(10), + value char(20) +) stored as orc; + +insert overwrite table char_2 select * from src; + +select key, value +from src +order by key asc +limit 5; + +explain select key, value +from char_2 +order by key asc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key asc +limit 5; + +select key, value +from src +order by key desc +limit 5; + +explain select key, value +from char_2 +order by key desc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key desc +limit 5; + +drop table char_2; diff --git ql/src/test/queries/clientpositive/vector_string_temp.q ql/src/test/queries/clientpositive/vector_string_temp.q new file mode 100644 index 0000000..0ac71ca --- /dev/null +++ ql/src/test/queries/clientpositive/vector_string_temp.q @@ -0,0 +1,49 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key string, + value string +) stored as orc; + +insert overwrite table char_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +drop table char_2; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q new file mode 100644 index 0000000..68d6b09 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -0,0 +1,43 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc; + +insert overwrite table char_2 select * from src; + +select key, value +from src +order by key asc +limit 5; + +explain select key, value +from char_2 +order by key asc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key asc +limit 5; + +select key, value +from src +order by key desc +limit 5; + +explain select key, value +from char_2 +order by key desc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key desc +limit 5; + +drop table char_2; diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out new file mode 100644 index 0000000..35535f6 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -0,0 +1,293 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/tez/vector_char_simple.q.out ql/src/test/results/clientpositive/tez/vector_char_simple.q.out new file mode 100644 index 0000000..c197d52 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -0,0 +1,231 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: + + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: - + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/tez/vector_string_temp.q.out ql/src/test/results/clientpositive/tez/vector_string_temp.q.out new file mode 100644 index 0000000..342fc94 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_string_temp.q.out @@ -0,0 +1,297 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key string, + value string +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key string, + value string +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: - + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out new file mode 100644 index 0000000..bf3e424 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out @@ -0,0 +1,231 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: - + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out new file mode 100644 index 0000000..fe1ac33 --- /dev/null +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -0,0 +1,303 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out new file mode 100644 index 0000000..fab0776 --- /dev/null +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: + + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: - + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_string_temp.q.out ql/src/test/results/clientpositive/vector_string_temp.q.out new file mode 100644 index 0000000..0c154c9 --- /dev/null +++ ql/src/test/results/clientpositive/vector_string_temp.q.out @@ -0,0 +1,303 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key string, + value string +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key string, + value string +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: - + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out new file mode 100644 index 0000000..3009a69 --- /dev/null +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: - + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2