diff --git itests/qtest/testconfiguration.properties itests/qtest/testconfiguration.properties index cf63046..6a3ee1d 100644 --- itests/qtest/testconfiguration.properties +++ itests/qtest/testconfiguration.properties @@ -1,5 +1,5 @@ minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q,temp_table_external.q minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,tez_join_hash.q -minitez.query.files.shared=cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q +minitez.query.files.shared=cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 014ee9e..e13a9e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1096,7 +1096,7 @@ private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge ud List childExpr, Mode mode, TypeInfo returnType) throws HiveException { Class cl = udf.getUdfClass(); if (isCastToIntFamily(cl)) { - return getCastToLongExpression(childExpr); + return getCastToLongExpression(childExpr, returnType); } else if (cl.equals(UDFToBoolean.class)) { return getCastToBoolean(childExpr); } else if (isCastToFloatFamily(cl)) { @@ -1167,11 +1167,89 @@ private Decimal128 castConstantToDecimal(Object scalar, TypeInfo type) throws Hi d.update(decimalVal.unscaledValue(), (short) scale); break; default: - throw new HiveException("Unsupported type "+typename+" for cast to Decimal128"); + throw new HiveException("Unsupported type " + typename + " for cast to Decimal128"); } return d; } + private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException { + PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; + String typename = type.getTypeName(); + Double d; + switch (ptinfo.getPrimitiveCategory()) { + case FLOAT: + float floatVal = ((Float) scalar).floatValue(); + d = new Double(floatVal); + break; + case DOUBLE: + double doubleVal = ((Double) scalar).doubleValue(); + d = new Double(doubleVal); + break; + case BYTE: + byte byteVal = ((Byte) scalar).byteValue(); + d = new Double(byteVal); + break; + case SHORT: + short shortVal = ((Short) scalar).shortValue(); + d = new Double(shortVal); + break; + case INT: + int intVal = ((Integer) scalar).intValue(); + d = new Double(intVal); + break; + case LONG: + long longVal = ((Long) scalar).longValue(); + d = new Double(longVal); + break; + case DECIMAL: + HiveDecimal decimalVal = (HiveDecimal) scalar; + d = new Double(decimalVal.doubleValue()); + break; + default: + throw new HiveException("Unsupported type " + typename + " for cast to Double"); + } + return d; + } + + private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException { + PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; + String typename = type.getTypeName(); + Long l; + switch (ptinfo.getPrimitiveCategory()) { + case FLOAT: + float floatVal = ((Float) scalar).floatValue(); + l = new Long((long) floatVal); + break; + case DOUBLE: + double doubleVal = ((Double) scalar).doubleValue(); + l = new Long((long) doubleVal); + break; + case BYTE: + byte byteVal = ((Byte) scalar).byteValue(); + l = new Long(byteVal); + break; + case SHORT: + short shortVal = ((Short) scalar).shortValue(); + l = new Long(shortVal); + break; + case INT: + int intVal = ((Integer) scalar).intValue(); + l = new Long(intVal); + break; + case LONG: + long longVal = ((Long) scalar).longValue(); + l = new Long(longVal); + break; + case DECIMAL: + HiveDecimal decimalVal = (HiveDecimal) scalar; + l = new Long(decimalVal.longValue()); + break; + default: + throw new HiveException("Unsupported type " + typename + " for cast to Long"); + } + return l; + } + private VectorExpression getCastToString(List childExpr, TypeInfo returnType) throws HiveException { String inputType = childExpr.get(0).getTypeString(); @@ -1194,7 +1272,14 @@ private VectorExpression getCastToString(List childExpr, TypeInfo private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, TypeInfo returnType) throws HiveException { - String inputType = childExpr.get(0).getTypeString(); + ExprNodeDesc child = childExpr.get(0); + String inputType = child.getTypeString(); + if (child instanceof ExprNodeConstantDesc) { + // Return a constant vector expression + Object constantValue = ((ExprNodeConstantDesc) child).getValue(); + Double doubleValue = castConstantToDouble(constantValue, child.getTypeInfo()); + return getConstantVectorExpression(doubleValue, returnType, Mode.PROJECTION); + } if (isIntFamily(inputType)) { return createVectorExpression(CastLongToDouble.class, childExpr, Mode.PROJECTION, returnType); } else if (inputType.equals("timestamp")) { @@ -1231,9 +1316,16 @@ private VectorExpression getCastToBoolean(List childExpr) return null; } - private VectorExpression getCastToLongExpression(List childExpr) + private VectorExpression getCastToLongExpression(List childExpr, TypeInfo returnType) throws HiveException { - String inputType = childExpr.get(0).getTypeString(); + ExprNodeDesc child = childExpr.get(0); + String inputType = child.getTypeString(); + if (child instanceof ExprNodeConstantDesc) { + // Return a constant vector expression + Object constantValue = ((ExprNodeConstantDesc) child).getValue(); + Long longValue = castConstantToLong(constantValue, child.getTypeInfo()); + return getConstantVectorExpression(longValue, returnType, Mode.PROJECTION); + } // Float family, timestamp are handled via descriptor based lookup, int family needs // special handling. if (isIntFamily(inputType)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index eeb76d7..25f1abd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -323,12 +323,12 @@ public Object writeValue(ColumnVector column, int row) throws HiveException { return writeValue(dcv.vector[0]); } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { return writeValue(dcv.vector[row]); + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { return writeValue(dcv.vector[0]); } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { return null; - } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { - return null; } throw new HiveException( String.format( diff --git ql/src/test/queries/clientpositive/vector_cast_constant.q ql/src/test/queries/clientpositive/vector_cast_constant.q new file mode 100644 index 0000000..109f86b --- /dev/null +++ ql/src/test/queries/clientpositive/vector_cast_constant.q @@ -0,0 +1,48 @@ +SET hive.vectorized.execution.enabled=true; + +DROP TABLE over1k; +DROP TABLE over1korc; + +-- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k; + +CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC; + +INSERT INTO TABLE over1korc SELECT * FROM over1k; + +EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10; + +SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10; diff --git ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out new file mode 100644 index 0000000..d10f847 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out @@ -0,0 +1,197 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: i + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(50), avg(UDFToDouble(50)) + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +65536 50.0 50.0 +65537 50.0 50.0 +65538 50.0 50.0 +65539 50.0 50.0 +65540 50.0 50.0 +65541 50.0 50.0 +65542 50.0 50.0 +65543 50.0 50.0 +65544 50.0 50.0 +65545 50.0 50.0 diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out new file mode 100644 index 0000000..18f7fe6 --- /dev/null +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -0,0 +1,191 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: i + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(50), avg(UDFToDouble(50)) + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok` + FROM over1korc GROUP BY i LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +65536 50.0 50.0 +65537 50.0 50.0 +65538 50.0 50.0 +65539 50.0 50.0 +65540 50.0 50.0 +65541 50.0 50.0 +65542 50.0 50.0 +65543 50.0 50.0 +65544 50.0 50.0 +65545 50.0 50.0