diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index 4ed53e5..663a572 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -71,6 +71,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -80,6 +81,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -142,6 +145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -151,6 +155,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -177,6 +183,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -186,6 +193,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -205,6 +214,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -214,6 +224,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -224,6 +236,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -233,6 +246,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -253,6 +268,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -262,6 +278,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -284,6 +302,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -293,6 +312,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -312,6 +333,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -321,6 +343,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -331,6 +355,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns id @@ -340,6 +365,8 @@ STAGE PLANS: location ### test.blobstore.path ###/table1 name default.table1 numFiles 2 + numRows 2 + rawDataSize 2 serialization.ddl struct table1 { i32 id} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 8a90a9e..46bfef5 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -192,6 +192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns key @@ -200,9 +201,13 @@ STAGE PLANS: #### A masked pattern was here #### location ### test.blobstore.path ###/write_final_output_blobstore name default.blobstore_table + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct blobstore_table { i32 key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.blobstore_table @@ -219,6 +224,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns key @@ -227,9 +233,13 @@ STAGE PLANS: #### A masked pattern was here #### location ### test.blobstore.path ###/write_final_output_blobstore name default.blobstore_table + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct blobstore_table { i32 key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.blobstore_table @@ -406,6 +416,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns key @@ -414,9 +425,13 @@ STAGE PLANS: #### A masked pattern was here #### location ### test.blobstore.path ###/write_final_output_blobstore name default.blobstore_table + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct blobstore_table { i32 key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.blobstore_table @@ -433,6 +448,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , columns key @@ -441,9 +457,13 @@ STAGE PLANS: #### A masked pattern was here #### location ### test.blobstore.path ###/write_final_output_blobstore name default.blobstore_table + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct blobstore_table { i32 key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.blobstore_table diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 3122689..a53f774 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.metadata; +import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -29,6 +30,7 @@ import java.util.Properties; import java.util.Set; +import com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -946,6 +948,16 @@ public static String getCompleteName(String dbName, String tabName) { } } + public boolean isEmpty() throws HiveException { + Preconditions.checkNotNull(getPath()); + try { + FileSystem fs = FileSystem.get(getPath().toUri(), SessionState.getSessionConf()); + return !fs.exists(getPath()) || fs.listStatus(getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER).length == 0; + } catch (IOException e) { + throw new HiveException(e); + } + } + public boolean isTemporary() { return tTable.isTemporary(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index 4f614a8..d971c73 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -834,7 +834,8 @@ public Table toTable(HiveConf conf) throws HiveException { } } } - if (getLocation() == null && !this.isCTAS) { + + if (!this.isCTAS && (tbl.getPath() == null || (tbl.isEmpty() && !isExternal()))) { if (!tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE); diff --git a/ql/src/test/results/clientnegative/external1.q.out b/ql/src/test/results/clientnegative/external1.q.out index c583627..661d669 100644 --- a/ql/src/test/results/clientnegative/external1.q.out +++ b/ql/src/test/results/clientnegative/external1.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE #### A masked pattern was here #### PREHOOK: Output: database:default PREHOOK: Output: default@external1 -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Got exception: java.io.IOException No FileSystem for scheme: invalidscheme) +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. java.io.IOException: No FileSystem for scheme: invalidscheme diff --git a/ql/src/test/results/clientpositive/default_file_format.q.out b/ql/src/test/results/clientpositive/default_file_format.q.out index ef0ca52..55401c0 100644 --- a/ql/src/test/results/clientpositive/default_file_format.q.out +++ b/ql/src/test/results/clientpositive/default_file_format.q.out @@ -201,6 +201,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 #### A masked pattern was here #### # Storage Information @@ -498,6 +503,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 4382522..1bae859 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -48,7 +48,10 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 1 + numRows 2 + rawDataSize 634 totalSize 578 #### A masked pattern was here #### @@ -72,6 +75,7 @@ POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment amount decimal(10,3) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 @@ -112,23 +116,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: item is not null (type: boolean) - Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), item (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: d - Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((id = 2) and item is not null) (type: boolean) Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE @@ -150,14 +154,14 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col3 (type: string) outputColumnNames: _col0, _col3, _col4 - Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), COALESCE(_col3,0) (type: decimal(13,3)), COALESCE(_col4,0) (type: decimal(13,3)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index 98ba6af..5db87d9 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -48,7 +48,10 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 1 + numRows 2 + rawDataSize 634 totalSize 578 #### A masked pattern was here #### @@ -72,6 +75,7 @@ POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment amount decimal(10,3) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 @@ -117,18 +121,18 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=5 width=228) + Select Operator [SEL_9] (rows=2 width=228) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_15] (rows=5 width=228) + Merge Join Operator [MERGEJOIN_15] (rows=2 width=228) Conds:RS_6._col1=RS_7._col3(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col1 - Select Operator [SEL_2] (rows=5 width=88) + Select Operator [SEL_2] (rows=2 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=5 width=88) + Filter Operator [FIL_13] (rows=2 width=88) predicate:item is not null - TableScan [TS_0] (rows=5 width=88) + TableScan [TS_0] (rows=2 width=88) default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","item"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] @@ -137,6 +141,6 @@ Stage-0 Output:["_col1","_col2","_col3"] Filter Operator [FIL_14] (rows=1 width=312) predicate:((id = 2) and item is not null) - TableScan [TS_3] (rows=1 width=312) + TableScan [TS_3] (rows=2 width=312) default@testdeci2,d,Tbl:COMPLETE,Col:COMPLETE,Output:["id","amount","sales_tax","item"] diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index 4660c8b..551ad5f 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -292,6 +292,7 @@ POSTHOOK: Input: default@empty_tab # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment a int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: explain analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY @@ -360,6 +361,7 @@ POSTHOOK: Input: default@empty_tab # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment a int 0 0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: desc formatted empty_tab b PREHOOK: type: DESCTABLE PREHOOK: Input: default@empty_tab @@ -369,6 +371,7 @@ POSTHOOK: Input: default@empty_tab # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment b double 0.0 0.0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: CREATE DATABASE test PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:test