diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ed99be59c2..d28fa672db 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -601,6 +601,7 @@ minillaplocal.query.files=\ mrr.q,\ multiMapJoin1.q,\ multiMapJoin2.q,\ + murmur_hash_migration.q,\ non_native_window_udf.q,\ optimize_join_ptp.q,\ orc_analyze.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 7ba313707a..205e7266b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -161,6 +161,9 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, runStatsAnnotation(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup stats in the operator plan"); + // Update bucketing version of ReduceSinkOp if needed + updateBucketingVersionForUpgrade(procCtx); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // run the optimizations that use stats for optimization runStatsDependentOptimizations(procCtx, inputs, outputs); @@ -1476,4 +1479,43 @@ private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) } } } + + private void updateBucketingVersionForUpgrade(OptimizeTezProcContext procCtx) { + // Fetch all the FileSinkOperators. + Set fsOpsAll = new HashSet<>(); + for (TableScanOperator ts : procCtx.parseContext.getTopOps().values()) { + Set fsOps = OperatorUtils.findOperators( + ts, FileSinkOperator.class); + fsOpsAll.addAll(fsOps); + } + + + for (FileSinkOperator fsOp : fsOpsAll) { + Operator parentOfFS = fsOp.getParentOperators().get(0); + if (parentOfFS instanceof GroupByOperator) { + GroupByOperator gbyOp = (GroupByOperator) parentOfFS; + List aggs = gbyOp.getConf().getAggregatorStrings(); + boolean compute_stats = false; + for (String agg : aggs) { + if (agg.equalsIgnoreCase("compute_stats")) { + compute_stats = true; + break; + } + } + if (compute_stats) { + continue; + } + } + + // Not compute_stats + Set rsOps = OperatorUtils.findOperatorsUpstream(parentOfFS, ReduceSinkOperator.class); + if (rsOps.isEmpty()) { + continue; + } + // Skip setting if the bucketing version is not set in FileSinkOp. + if (fsOp.getConf().getTableInfo().isSetBucketingVersion()) { + rsOps.iterator().next().setBucketingVersion(fsOp.getConf().getTableInfo().getBucketingVersion()); + } + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 4068e5670f..5de77c6090 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -184,6 +184,9 @@ public boolean isNonNative() { return (properties.getProperty(hive_metastoreConstants.META_TABLE_STORAGE) != null); } + public boolean isSetBucketingVersion() { + return properties.getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION) != null; + } public int getBucketingVersion() { return Utilities.getBucketingVersion( properties.getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION)); diff --git a/ql/src/test/queries/clientpositive/murmur_hash_migration.q b/ql/src/test/queries/clientpositive/murmur_hash_migration.q new file mode 100644 index 0000000000..2b8da9f683 --- /dev/null +++ b/ql/src/test/queries/clientpositive/murmur_hash_migration.q @@ -0,0 +1,61 @@ +--! qt:dataset:src +set hive.stats.column.autogather=false; +set hive.strict.checks.bucketing=false; + +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=30000; + +CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1'); +CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1'); + +CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1'); +CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1'); + +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08'); + +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08'); + +set hive.optimize.bucketingsorting=false; + + +insert overwrite table srcbucket_mapjoin_n18 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_n18_stage limit 150; + +insert overwrite table srcbucket_mapjoin_part_n20 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20_stage limit 150; + +analyze table srcbucket_mapjoin_n18 compute statistics for columns; +analyze table srcbucket_mapjoin_part_n20 compute statistics for columns; + + +CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +explain +insert overwrite table tab_part_n11 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20; +insert overwrite table tab_part_n11 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20; + +CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +explain +insert overwrite table tab_n10 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_n18; +insert overwrite table tab_n10 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_n18; + +analyze table tab_part_n11 compute statistics for columns; +analyze table tab_n10 compute statistics for columns; + +explain +select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value; +select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value; + +explain +select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value; +select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value; diff --git a/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out b/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out new file mode 100644 index 0000000000..be5b5f7c94 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/murmur_hash_migration.q.out @@ -0,0 +1,618 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_n18_stage +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_n18 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_n18 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_n20 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20 +PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_n18_stage +POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage +POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18_stage partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage +POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20_stage partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +PREHOOK: query: insert overwrite table srcbucket_mapjoin_n18 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_n18_stage limit 150 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_n18_stage +PREHOOK: Input: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcbucket_mapjoin_n18 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_n18_stage limit 150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_n18_stage +POSTHOOK: Input: default@srcbucket_mapjoin_n18_stage@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08 +POSTHOOK: Lineage: srcbucket_mapjoin_n18 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_n18_stage)srcbucket_mapjoin_n18_stage.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: srcbucket_mapjoin_n18 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_n18_stage)srcbucket_mapjoin_n18_stage.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table srcbucket_mapjoin_part_n20 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20_stage limit 150 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_n20_stage +PREHOOK: Input: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcbucket_mapjoin_part_n20 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20_stage limit 150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20_stage +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20_stage@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +POSTHOOK: Lineage: srcbucket_mapjoin_part_n20 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part_n20_stage)srcbucket_mapjoin_part_n20_stage.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_n20 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part_n20_stage)srcbucket_mapjoin_part_n20_stage.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table srcbucket_mapjoin_n18 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcbucket_mapjoin_n18 +PREHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin_n18 +PREHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcbucket_mapjoin_n18 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcbucket_mapjoin_n18 +POSTHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin_n18 +POSTHOOK: Output: default@srcbucket_mapjoin_n18@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcbucket_mapjoin_part_n20 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcbucket_mapjoin_part_n20 +PREHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin_part_n20 +PREHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcbucket_mapjoin_part_n20 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20 +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20 +POSTHOOK: Output: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part_n11 +POSTHOOK: query: CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part_n11 +PREHOOK: query: explain +insert overwrite table tab_part_n11 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table tab_part_n11 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin_part_n20 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tab_part_n11 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tab_part_n11 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table tab_part_n11 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_n20 +PREHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +PREHOOK: Output: default@tab_part_n11@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part_n11 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_part_n20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20 +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +POSTHOOK: Output: default@tab_part_n11@ds=2008-04-08 +POSTHOOK: Lineage: tab_part_n11 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part_n20)srcbucket_mapjoin_part_n20.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part_n11 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part_n20)srcbucket_mapjoin_part_n20.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_n10 +POSTHOOK: query: CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_n10 +PREHOOK: query: explain +insert overwrite table tab_n10 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_n18 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table tab_n10 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_n18 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin_n18 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tab_n10 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tab_n10 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table tab_n10 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_n18 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_n18 +PREHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08 +PREHOOK: Output: default@tab_n10@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_n10 partition (ds='2008-04-08') + select key,value from srcbucket_mapjoin_n18 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_n18 +POSTHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08 +POSTHOOK: Output: default@tab_n10@ds=2008-04-08 +POSTHOOK: Lineage: tab_n10 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_n18)srcbucket_mapjoin_n18.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_n10 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_n18)srcbucket_mapjoin_n18.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table tab_part_n11 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@tab_part_n11 +PREHOOK: Input: default@tab_part_n11@ds=2008-04-08 +PREHOOK: Output: default@tab_part_n11 +PREHOOK: Output: default@tab_part_n11@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab_part_n11 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@tab_part_n11 +POSTHOOK: Input: default@tab_part_n11@ds=2008-04-08 +POSTHOOK: Output: default@tab_part_n11 +POSTHOOK: Output: default@tab_part_n11@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table tab_n10 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@tab_n10 +PREHOOK: Input: default@tab_n10@ds=2008-04-08 +PREHOOK: Output: default@tab_n10 +PREHOOK: Output: default@tab_n10@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab_n10 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@tab_n10 +POSTHOOK: Input: default@tab_n10@ds=2008-04-08 +POSTHOOK: Output: default@tab_n10 +POSTHOOK: Output: default@tab_n10@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: explain +select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_n18 +PREHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_n20 +PREHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select t1.key, t1.value, t2.key, t2.value from srcbucket_mapjoin_n18 t1, srcbucket_mapjoin_part_n20 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_n18 +POSTHOOK: Input: default@srcbucket_mapjoin_n18@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20 +POSTHOOK: Input: default@srcbucket_mapjoin_part_n20@ds=2008-04-08 +#### A masked pattern was here #### +82 val_82 82 val_82 +86 val_86 86 val_86 +145 val_145 145 val_145 +152 val_152 152 val_152 +152 val_152 152 val_152 +219 val_219 219 val_219 +219 val_219 219 val_219 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +446 val_446 446 val_446 +PREHOOK: query: explain +select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 150 Data size: 14250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 220 Data size: 41800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_n10 +PREHOOK: Input: default@tab_n10@ds=2008-04-08 +PREHOOK: Input: default@tab_part_n11 +PREHOOK: Input: default@tab_part_n11@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select t1.key, t1.value, t2.key, t2.value from tab_part_n11 t1, tab_n10 t2 where t1.key = t2.key order by t1.key, t1.value, t2.key, t2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_n10 +POSTHOOK: Input: default@tab_n10@ds=2008-04-08 +POSTHOOK: Input: default@tab_part_n11 +POSTHOOK: Input: default@tab_part_n11@ds=2008-04-08 +#### A masked pattern was here #### +82 val_82 82 val_82 +86 val_86 86 val_86 +145 val_145 145 val_145 +152 val_152 152 val_152 +152 val_152 152 val_152 +219 val_219 219 val_219 +219 val_219 219 val_219 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +446 val_446 446 val_446