diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e46e6ce..05e0911 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -70,6 +70,7 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ smb_mapjoin_8.q minitez.query.files.shared=acid_globallimit.q,\ + deleteAnalyze.q,\ empty_join.q,\ alter_merge_2_orc.q,\ alter_merge_orc.q,\ diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java index e0b449b..9dce801 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java @@ -738,6 +738,7 @@ static StorageDescriptor deserializeStorageDescriptor(byte[] serialized) static List getPartitionKeyTypes(List parts) { com.google.common.base.Function fieldSchemaToType = new com.google.common.base.Function() { + @Override public String apply(FieldSchema fs) { return fs.getType(); } }; return Lists.transform(parts, fieldSchemaToType); @@ -746,6 +747,7 @@ static StorageDescriptor deserializeStorageDescriptor(byte[] serialized) static List getPartitionNames(List parts) { com.google.common.base.Function fieldSchemaToName = new com.google.common.base.Function() { + @Override public String apply(FieldSchema fs) { return fs.getName(); } }; return Lists.transform(parts, fieldSchemaToName); @@ -1205,14 +1207,19 @@ static StorageDescriptorParts deserializeTable(String dbName, String tableName, if (decimalData.isSetBitVectors()) { builder.setBitVectors(decimalData.getBitVectors()); } + byte[] empty = new byte[1]; builder.setDecimalStats( HbaseMetastoreProto.ColumnStats.DecimalStats .newBuilder() - .setLowValue( + .setLowValue(decimalData.getLowValue() == null? + HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() + .setUnscaled(ByteString.copyFrom(empty)).setScale(0).build() : HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled())) .setScale(decimalData.getLowValue().getScale()).build()) - .setHighValue( + .setHighValue(decimalData.getHighValue() == null? + HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() + .setUnscaled(ByteString.copyFrom(empty)).setScale(0).build() : HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled())) .setScale(decimalData.getHighValue().getScale()).build())).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index f330564..636f079 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -412,7 +412,7 @@ private void completeInitialization(Collection> fs) throws HiveExcepti } /** - * This metod can be used to retrieve the results from async operations + * This method can be used to retrieve the results from async operations * started at init time - before the operator pipeline is started. * * @param os diff --git ql/src/test/queries/clientpositive/deleteAnalyze.q ql/src/test/queries/clientpositive/deleteAnalyze.q new file mode 100644 index 0000000..e83151c --- /dev/null +++ ql/src/test/queries/clientpositive/deleteAnalyze.q @@ -0,0 +1,29 @@ +set hive.stats.autogather=true; + +create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +stored as orc; + +insert into table testdeci2 values(1,123.123,1234.123,'desk1'),(2,123.123,1234.123,'desk2'); + +describe formatted testdeci2; + +dfs -rmr /build/ql/test/data/warehouse/testdeci2/000000_0; + +describe formatted testdeci2 amount; + +analyze table testdeci2 compute statistics for columns; + +set hive.stats.fetch.column.stats=true; + +analyze table testdeci2 compute statistics for columns; + +explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2; diff --git ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out new file mode 100644 index 0000000..a536f43 --- /dev/null +++ ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out @@ -0,0 +1,141 @@ +PREHOOK: query: create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testdeci2 +POSTHOOK: query: create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testdeci2 +PREHOOK: query: insert into table testdeci2 values(1,123.123,1234.123,'desk1'),(2,123.123,1234.123,'desk2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@testdeci2 +POSTHOOK: query: insert into table testdeci2 values(1,123.123,1234.123,'desk1'),(2,123.123,1234.123,'desk2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@testdeci2 +POSTHOOK: Lineage: testdeci2.amount EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.item SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.sales_tax EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: describe formatted testdeci2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testdeci2 +POSTHOOK: query: describe formatted testdeci2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testdeci2 +# col_name data_type comment + +id int +amount decimal(10,3) +sales_tax decimal(10,3) +item string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 634 + totalSize 551 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +#### A masked pattern was here #### +PREHOOK: query: describe formatted testdeci2 amount +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testdeci2 +POSTHOOK: query: describe formatted testdeci2 amount +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testdeci2 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +amount decimal(10,3) from deserializer +PREHOOK: query: analyze table testdeci2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table testdeci2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +PREHOOK: query: analyze table testdeci2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table testdeci2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +PREHOOK: query: explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=9223372036854775807 width=1) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_15] (rows=9223372036854775807 width=1) + Conds:RS_6._col1=RS_7._col3(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=2 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=2 width=88) + predicate:item is not null + TableScan [TS_0] (rows=2 width=88) + default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","item"] + <-Map 3 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col3 + Select Operator [SEL_5] (rows=1 width=312) + Output:["_col1","_col2","_col3"] + Filter Operator [FIL_14] (rows=1 width=312) + predicate:((id = 2) and item is not null) + TableScan [TS_3] (rows=2 width=312) + default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","amount","sales_tax","item"] +