diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 73fcb03..3490d3c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -241,6 +241,7 @@ minitez.query.files.shared=acid_globallimit.q,\ unionDistinct_1.q,\ unionDistinct_2.q,\ union_fast_stats.q,\ + union_stats.q,\ update_after_multiple_inserts.q,\ update_all_non_partitioned.q,\ update_all_partitioned.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 812a943..b0c3d3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -1230,6 +1230,7 @@ private void publishStats() throws HiveException { } } } + sContext.setIndexForTezUnion(this.conf.getIndexInTezUnion()); if (!statsPublisher.closeConnection(sContext)) { // The original exception is lost. // Not changing the interface to maintain backward compatibility diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index c626add..546919b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -233,7 +233,7 @@ private void releaseLocks(LoadTableDesc ltd) throws HiveException { } // we check if there is only one immediate child task and it is stats task - private boolean hasFollowingStatsTask() { + public boolean hasFollowingStatsTask() { if (this.getNumChild() == 1) { return this.getChildTasks().get(0) instanceof StatsTask; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index f555741..47d5b74 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1494,11 +1494,6 @@ public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask, statsWork.setAggKey(nd.getConf().getStatsAggPrefix()); Task statsTask = TaskFactory.get(statsWork, hconf); - // mark the MapredWork and FileSinkOperator for gathering stats - nd.getConf().setGatherStats(true); - nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); - // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); - // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); @@ -1736,9 +1731,14 @@ public static boolean isMergeRequired(List> mvTasks, HiveConf hco // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) GenMapRedUtils.findMoveTask(mvTasks, fsOp); - if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && - !fsOp.getConf().isMaterialization()) { - GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf); + if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) + && !fsOp.getConf().isMaterialization()) { + // mark the MapredWork and FileSinkOperator for gathering stats + fsOp.getConf().setGatherStats(true); + fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); + if (!mvTask.hasFollowingStatsTask()) { + GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf); + } } if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 32bf24d..6715dbf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -304,7 +304,8 @@ public static void removeUnionOperators(GenTezProcContext context, BaseWork work linked = context.linkedFileSinks.get(path); linked.add(desc); - desc.setDirName(new Path(path, ""+linked.size())); + desc.setIndexInTezUnion(linked.size()); + desc.setDirName(new Path(path, "" + desc.getIndexInTezUnion())); desc.setLinkedFileSink(true); desc.setParentDir(path); desc.setLinkedFileSinkDesc(linked); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 07ed4fd..ce0e0a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -61,6 +61,7 @@ private DynamicPartitionCtx dpCtx; private String staticSpec; // static partition spec ends with a '/' private boolean gatherStats; + private int indexInTezUnion = -1; // Consider a query like: // insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key; @@ -474,4 +475,12 @@ public void setStatsTmpDir(String statsCollectionTempDir) { this.statsTmpDir = statsCollectionTempDir; } + public int getIndexInTezUnion() { + return indexInTezUnion; + } + + public void setIndexInTezUnion(int indexInTezUnion) { + this.indexInTezUnion = indexInTezUnion; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java index ae6f2ac..b58dbf3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java @@ -30,6 +30,7 @@ private final Configuration hiveConf; private Task task; private List statsTmpDirs; + private int indexForTezUnion; public List getStatsTmpDirs() { return statsTmpDirs; @@ -60,4 +61,12 @@ public Task getTask() { public void setTask(Task task) { this.task = task; } + + public int getIndexForTezUnion() { + return indexForTezUnion; + } + + public void setIndexForTezUnion(int indexForTezUnion) { + this.indexForTezUnion = indexForTezUnion; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java index 3a49b30..5b4f1fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java @@ -97,7 +97,14 @@ public boolean closeConnection(StatsCollectionContext context) { assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs; Path statsDir = new Path(statsDirs.get(0)); try { - Path statsFile = new Path(statsDir,StatsSetupConst.STATS_FILE_PREFIX +conf.getInt("mapred.task.partition",0)); + Path statsFile = null; + if (context.getIndexForTezUnion() != -1) { + statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX + + conf.getInt("mapred.task.partition", 0) + "_" + context.getIndexForTezUnion()); + } else { + statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX + + conf.getInt("mapred.task.partition", 0)); + } LOG.debug("About to create stats file for this task : " + statsFile); Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true)); LOG.debug("Created file : " + statsFile); diff --git a/ql/src/test/queries/clientpositive/union_stats.q b/ql/src/test/queries/clientpositive/union_stats.q new file mode 100644 index 0000000..789b360 --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_stats.q @@ -0,0 +1,29 @@ +explain extended create table t as select * from src union all select * from src; + +create table t as select * from src union all select * from src; + +select count(1) from t; + +desc formatted t; + +create table tt as select * from t union all select * from src; + +desc formatted tt; + +drop table tt; + +create table tt as select * from src union all select * from t; + +desc formatted tt; + +create table t1 like src; +create table t2 like src; + +from (select * from src union all select * from src)s +insert overwrite table t1 select * +insert overwrite table t2 select *; + +desc formatted t1; +desc formatted t2; + +select count(1) from t1; diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index faa3adb..5f06cda 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -49,7 +49,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -127,9 +126,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - PREHOOK: query: insert into table partunion1 partition(part1) select temps.* from ( select 1 as id1, '2014' as part1 from dummy diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 26f96b1..96c57d9 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -2902,25 +2902,10 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-0 - Stage-11 depends on stages: Stage-0 - Stage-14 depends on stages: Stage-0 - Stage-17 depends on stages: Stage-0 - Stage-20 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 - Stage-9 depends on stages: Stage-1 - Stage-12 depends on stages: Stage-1 - Stage-15 depends on stages: Stage-1 - Stage-18 depends on stages: Stage-1 - Stage-21 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 Stage-7 depends on stages: Stage-2 - Stage-10 depends on stages: Stage-2 - Stage-13 depends on stages: Stage-2 - Stage-16 depends on stages: Stage-2 - Stage-19 depends on stages: Stage-2 - Stage-22 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -3471,21 +3456,6 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-8 - Stats-Aggr Operator - - Stage: Stage-11 - Stats-Aggr Operator - - Stage: Stage-14 - Stats-Aggr Operator - - Stage: Stage-17 - Stats-Aggr Operator - - Stage: Stage-20 - Stats-Aggr Operator - Stage: Stage-1 Move Operator tables: @@ -3499,21 +3469,6 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-12 - Stats-Aggr Operator - - Stage: Stage-15 - Stats-Aggr Operator - - Stage: Stage-18 - Stats-Aggr Operator - - Stage: Stage-21 - Stats-Aggr Operator - Stage: Stage-2 Move Operator tables: @@ -3527,21 +3482,6 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-13 - Stats-Aggr Operator - - Stage: Stage-16 - Stats-Aggr Operator - - Stage: Stage-19 - Stats-Aggr Operator - - Stage: Stage-22 - Stats-Aggr Operator - PREHOOK: query: explain FROM ( diff --git a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out index 83c6c82..b7afeed 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out @@ -49,7 +49,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -123,9 +122,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - PREHOOK: query: insert into table partunion1 partition(part1) select temps.* from ( select 1 as id1, '2014' as part1 from dummy diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out b/ql/src/test/results/clientpositive/tez/union4.q.out index 5a6ab81..2eaf71d 100644 --- a/ql/src/test/results/clientpositive/tez/union4.q.out +++ b/ql/src/test/results/clientpositive/tez/union4.q.out @@ -33,7 +33,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -141,9 +140,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out index a103eb0..f8a38cc 100644 --- a/ql/src/test/results/clientpositive/tez/union6.q.out +++ b/ql/src/test/results/clientpositive/tez/union6.q.out @@ -31,7 +31,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -113,9 +112,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out index efe9336..c2fb461 100644 --- a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out +++ b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out @@ -178,8 +178,8 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 3 - numRows 0 - rawDataSize 0 + numRows 15 + rawDataSize 3483 totalSize 4003 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/tez/union_stats.q.out b/ql/src/test/results/clientpositive/tez/union_stats.q.out new file mode 100644 index 0000000..e701209 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/union_stats.q.out @@ -0,0 +1,480 @@ +PREHOOK: query: explain extended create table t as select * from src union all select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain extended create table t as select * from src union all select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Union 2 + Vertex: Union 2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table t as select * from src union all select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t as select * from src union all select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(1) from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1000 +PREHOOK: query: desc formatted t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t +POSTHOOK: query: desc formatted t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: create table tt as select * from t union all select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@t +PREHOOK: Output: database:default +PREHOOK: Output: default@tt +POSTHOOK: query: create table tt as select * from t union all select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tt +POSTHOOK: Lineage: tt.key EXPRESSION [(t)t.FieldSchema(name:key, type:string, comment:null), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tt.value EXPRESSION [(t)t.FieldSchema(name:value, type:string, comment:null), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted tt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tt +POSTHOOK: query: desc formatted tt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tt +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1500 + rawDataSize 15936 + totalSize 17436 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table tt +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tt +PREHOOK: Output: default@tt +POSTHOOK: query: drop table tt +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tt +POSTHOOK: Output: default@tt +PREHOOK: query: create table tt as select * from src union all select * from t +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@t +PREHOOK: Output: database:default +PREHOOK: Output: default@tt +POSTHOOK: query: create table tt as select * from src union all select * from t +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tt +POSTHOOK: Lineage: tt.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (t)t.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: tt.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (t)t.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: desc formatted tt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tt +POSTHOOK: query: desc formatted tt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tt +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1500 + rawDataSize 15936 + totalSize 17436 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: create table t1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: from (select * from src union all select * from src)s +insert overwrite table t1 select * +insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select * from src union all select * from src)s +insert overwrite table t1 select * +insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted t1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t1 +POSTHOOK: query: desc formatted t1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted t2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t2 +POSTHOOK: query: desc formatted t2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t2 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1000 diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out new file mode 100644 index 0000000..ef8d9b5 --- /dev/null +++ b/ql/src/test/results/clientpositive/union_stats.q.out @@ -0,0 +1,548 @@ +PREHOOK: query: explain extended create table t as select * from src union all select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain extended create table t as select * from src union all select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-9 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-9 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [null-subquery1:$hdt$_0-subquery1:src, null-subquery2:$hdt$_0-subquery2:src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-9 + Create Table Operator: + Create Table + columns: key string, value string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + name: default.t + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + name default.t + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t + name: default.t + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table t as select * from src union all select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t as select * from src union all select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(1) from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1000 +PREHOOK: query: desc formatted t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t +POSTHOOK: query: desc formatted t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: create table tt as select * from t union all select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@t +PREHOOK: Output: database:default +PREHOOK: Output: default@tt +POSTHOOK: query: create table tt as select * from t union all select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tt +POSTHOOK: Lineage: tt.key EXPRESSION [(t)t.FieldSchema(name:key, type:string, comment:null), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tt.value EXPRESSION [(t)t.FieldSchema(name:value, type:string, comment:null), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted tt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tt +POSTHOOK: query: desc formatted tt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tt +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1500 + rawDataSize 15936 + totalSize 17436 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table tt +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tt +PREHOOK: Output: default@tt +POSTHOOK: query: drop table tt +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tt +POSTHOOK: Output: default@tt +PREHOOK: query: create table tt as select * from src union all select * from t +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@t +PREHOOK: Output: database:default +PREHOOK: Output: default@tt +POSTHOOK: query: create table tt as select * from src union all select * from t +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tt +POSTHOOK: Lineage: tt.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (t)t.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: tt.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (t)t.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: desc formatted tt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tt +POSTHOOK: query: desc formatted tt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tt +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1500 + rawDataSize 15936 + totalSize 17436 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: create table t1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: from (select * from src union all select * from src)s +insert overwrite table t1 select * +insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select * from src union all select * from src)s +insert overwrite table t1 select * +insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted t1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t1 +POSTHOOK: query: desc formatted t1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted t2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t2 +POSTHOOK: query: desc formatted t2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t2 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1000