diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 41d150c..71e49fc 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -137,30 +137,31 @@ public String getAggregator(Configuration conf) { public static final String FALSE = "false"; + // if it is empty, we treat it as accurate. public static boolean areBasicStatsUptoDate(Map params) { String statsAcc = params.get(COLUMN_STATS_ACCURATE); if (statsAcc == null) { - return false; + return true; } else { JSONObject jsonObj; try { jsonObj = new JSONObject(statsAcc); if (jsonObj != null && jsonObj.has(BASIC_STATS)) { - return true; + return jsonObj.get(BASIC_STATS).equals(TRUE) ? true : false; } else { - return false; + return true; } } catch (JSONException e) { // For backward compatibility, if previous value can not be parsed to a // json object, it will come here. LOG.debug("In StatsSetupConst, JsonParser can not parse " + BASIC_STATS + "."); // We try to parse it as TRUE or FALSE - if (statsAcc.equals(TRUE)) { - setBasicStatsState(params, TRUE); - return true; - } else { + if (statsAcc.equals(FALSE)) { setBasicStatsState(params, FALSE); return false; + } else { + setBasicStatsState(params, TRUE); + return true; } } } @@ -193,14 +194,26 @@ public static boolean areColumnStatsUptoDate(Map params, String } } + public static void clearStatsState(Map params) { + if (params.containsKey(COLUMN_STATS_ACCURATE)) { + params.remove(COLUMN_STATS_ACCURATE); + } + } + // It will only throw JSONException when stats.put(BASIC_STATS, TRUE) // has duplicate key, which is not possible // note that set basic stats false will wipe out column stats too. public static void setBasicStatsState(Map params, String setting) { if (setting.equals(FALSE)) { - if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) { - params.remove(COLUMN_STATS_ACCURATE); + JSONObject stats = new JSONObject(new LinkedHashMap()); + // duplicate key is not possible + try { + stats.put(BASIC_STATS, FALSE); + } catch (JSONException e) { + // impossible to throw any json exceptions. + LOG.trace(e.getMessage()); } + params.put(COLUMN_STATS_ACCURATE, stats.toString()); } else { String statsAcc = params.get(COLUMN_STATS_ACCURATE); if (statsAcc == null) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 866e1c3..2a4d0ac 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -238,7 +238,11 @@ public static boolean updateTableStatsFast(Table tbl, FileStatus[] fileStatus, b StatsSetupConst.STATS_GENERATED))) { StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + // if basicStats is true/empty, clear it to empty + // else (it is false), do not clear but keep it. + if (StatsSetupConst.areBasicStatsUptoDate(params)) { + StatsSetupConst.clearStatsState(params); + } } } tbl.setParameters(params); @@ -360,7 +364,9 @@ public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionItera StatsSetupConst.STATS_GENERATED))) { StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + if (StatsSetupConst.areBasicStatsUptoDate(params)) { + StatsSetupConst.clearStatsState(params); + } } } part.setParameters(params); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 87a7667..5ea54ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -163,6 +163,9 @@ private int aggregateStats(Hive db) { if (partitions == null) { org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable(); Map parameters = tTable.getParameters(); + if (work.getLoadTableDesc() != null && work.getLoadTableDesc().isFromLoadSemanticAnalyzer()) { + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); + } // non-partitioned tables: if (!existStats(parameters) && atomic) { return 0; @@ -203,6 +206,10 @@ private int aggregateStats(Hive db) { // org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); Map parameters = tPart.getParameters(); + if (work.getLoadTableDesc() != null + && work.getLoadTableDesc().isFromLoadSemanticAnalyzer()) { + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); + } if (!existStats(parameters) && atomic) { continue; } @@ -316,22 +323,25 @@ private void updateStats(StatsAggregator statsAggregator, String aggKey = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR; - for (String statType : StatsSetupConst.statsRequireCompute) { - String value = statsAggregator.aggregateStats(aggKey, statType); - if (value != null && !value.isEmpty()) { - long longValue = Long.parseLong(value); - - if (work.getLoadTableDesc() != null && - !work.getLoadTableDesc().getReplace()) { - String originalValue = parameters.get(statType); - if (originalValue != null) { - longValue += Long.parseLong(originalValue); // todo: invalid + valid = invalid + if (StatsSetupConst.areBasicStatsUptoDate(parameters)) { + for (String statType : StatsSetupConst.statsRequireCompute) { + String value = statsAggregator.aggregateStats(aggKey, statType); + if (value != null && !value.isEmpty()) { + long longValue = Long.parseLong(value); + + if (work.getLoadTableDesc() != null && !work.getLoadTableDesc().getReplace()) { + String originalValue = parameters.get(statType); + if (originalValue != null) { + longValue += Long.parseLong(originalValue); // todo: invalid + + // valid + // = invalid + } + } + parameters.put(statType, String.valueOf(longValue)); + } else { + if (atomic) { + throw new HiveException(ErrorMsg.STATSAGGREGATOR_MISSED_SOMESTATS, statType); } - } - parameters.put(statType, String.valueOf(longValue)); - } else { - if (atomic) { - throw new HiveException(ErrorMsg.STATSAGGREGATOR_MISSED_SOMESTATS, statType); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index b90616f..d6f6eb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -261,7 +261,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { LoadTableDesc loadTableWork; loadTableWork = new LoadTableDesc(new Path(fromURI), - Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); + Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite, true); if (preservePartitionSpecs){ // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but // but preservePartitionSpecs=false(default) here is not sufficient enough diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java index 771a919..372188f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java @@ -45,6 +45,7 @@ // TODO: the below seems like they should just be combined into partitionDesc private org.apache.hadoop.hive.ql.plan.TableDesc table; private Map partitionSpec; // NOTE: this partitionSpec has to be ordered map + private boolean isFromLoadSemanticAnalyzer; public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, @@ -69,6 +70,12 @@ public LoadTableDesc(final Path sourcePath, this(sourcePath, table, partitionSpec, replace, AcidUtils.Operation.NOT_ACID); } + public LoadTableDesc(final Path sourcePath, final TableDesc table, + final Map partitionSpec, final boolean replace, boolean isFromLoadSemanticAnalyzer) { + this(sourcePath, table, partitionSpec, replace, AcidUtils.Operation.NOT_ACID); + this.isFromLoadSemanticAnalyzer = isFromLoadSemanticAnalyzer; + } + public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, @@ -172,4 +179,8 @@ public void setLbCtx(ListBucketingCtx lbCtx) { public AcidUtils.Operation getWriteType() { return writeType; } + + public boolean isFromLoadSemanticAnalyzer() { + return isFromLoadSemanticAnalyzer; + } } diff --git a/ql/src/test/queries/clientpositive/insert_values_orig_table_use_metadata.q b/ql/src/test/queries/clientpositive/insert_values_orig_table_use_metadata.q new file mode 100644 index 0000000..0b09d23 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_values_orig_table_use_metadata.q @@ -0,0 +1,94 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.compute.query.using.stats=true; + +create table acid_ivot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc TBLPROPERTIES ('transactional'='true'); + +desc formatted acid_ivot; + +LOAD DATA LOCAL INPATH "../../data/files/alltypesorc" into table acid_ivot; + +desc formatted acid_ivot; + +explain select count(*) from acid_ivot; + +select count(*) from acid_ivot; + +insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true); + +desc formatted acid_ivot; + +explain select count(*) from acid_ivot; + +select count(*) from acid_ivot; + +drop table acid_ivot; + +create table acid_ivot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc TBLPROPERTIES ('transactional'='true'); + +insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true); + +desc formatted acid_ivot; + +explain select count(*) from acid_ivot; + +select count(*) from acid_ivot; + +insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true); + +desc formatted acid_ivot; + +explain select count(*) from acid_ivot; + +select count(*) from acid_ivot; + +LOAD DATA LOCAL INPATH "../../data/files/alltypesorc" into table acid_ivot; + +desc formatted acid_ivot; + +explain select count(*) from acid_ivot; + +drop table acid_ivot; + +create table acid_ivot like src; + +desc formatted acid_ivot; + +insert overwrite table acid_ivot select * from src; + +desc formatted acid_ivot; + +explain select count(*) from acid_ivot; + +select count(*) from acid_ivot; diff --git a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out new file mode 100644 index 0000000..5747938 --- /dev/null +++ b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out @@ -0,0 +1,741 @@ +PREHOOK: query: create table acid_ivot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: create table acid_ivot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/alltypesorc" into table acid_ivot +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/alltypesorc" into table acid_ivot +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"false\"} + numFiles 1 + totalSize 377237 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select count(*) from acid_ivot +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid_ivot +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid_ivot + Statistics: Num rows: 1 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_ivot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_ivot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +12288 +PREHOOK: query: insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acid_ivot +POSTHOOK: Lineage: acid_ivot.cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cboolean1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col11, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cboolean2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col12, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.csmallint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cstring1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cstring2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctimestamp1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctimestamp2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col10, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctinyint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"false\"} + numFiles 2 + totalSize 378741 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select count(*) from acid_ivot +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid_ivot +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid_ivot + Statistics: Num rows: 1 Data size: 378741 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 378741 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_ivot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_ivot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +12290 +PREHOOK: query: drop table acid_ivot +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acid_ivot +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: drop table acid_ivot +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acid_ivot +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: create table acid_ivot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: create table acid_ivot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@acid_ivot +POSTHOOK: Lineage: acid_ivot.cbigint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cboolean1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col11, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cboolean2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col12, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cdouble EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cfloat EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.csmallint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cstring1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cstring2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctimestamp1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctimestamp2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col10, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctinyint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 0 + totalSize 1508 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select count(*) from acid_ivot +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid_ivot +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_ivot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_ivot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +2 +PREHOOK: query: insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: insert into table acid_ivot values + (1, 2, 3, 4, 3.14, 2.34, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true), + (111, 222, 3333, 444, 13.14, 10239302.34239320, 'fred', 'bob', '2014-09-01 10:34:23.111', '1944-06-06 06:00:00', true, true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@acid_ivot +POSTHOOK: Lineage: acid_ivot.cbigint EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cboolean1 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col11, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cboolean2 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col12, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cdouble EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cfloat EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cint EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.csmallint EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cstring1 SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.cstring2 SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctimestamp1 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctimestamp2 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col10, type:string, comment:), ] +POSTHOOK: Lineage: acid_ivot.ctinyint EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 4 + rawDataSize 0 + totalSize 3016 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select count(*) from acid_ivot +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid_ivot +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_ivot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_ivot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +4 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/alltypesorc" into table acid_ivot +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/alltypesorc" into table acid_ivot +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"false\"} + numFiles 3 + totalSize 380253 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select count(*) from acid_ivot +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid_ivot +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid_ivot + Statistics: Num rows: 1 Data size: 380253 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 380253 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table acid_ivot +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acid_ivot +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: drop table acid_ivot +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acid_ivot +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: create table acid_ivot like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: create table acid_ivot like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_ivot +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table acid_ivot select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@acid_ivot +POSTHOOK: query: insert overwrite table acid_ivot select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@acid_ivot +POSTHOOK: Lineage: acid_ivot.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_ivot.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted acid_ivot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_ivot +POSTHOOK: query: desc formatted acid_ivot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_ivot +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select count(*) from acid_ivot +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid_ivot +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_ivot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_ivot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_ivot +#### A masked pattern was here #### +500