From 1d70c44a63a8596d0c8e948d35e67759fe7a0f18 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 5 Jan 2016 18:01:41 -0800 Subject: [PATCH] HIVE-12786 : CBO may fail for recoverable errors --- .../test/resources/testconfiguration.properties | 3 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 14 ++-- ql/src/test/queries/clientpositive/windowing_gby.q | 6 ++ .../results/clientpositive/tez/windowing_gby.q.out | 98 ++++++++++++++++++++++ 4 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/windowing_gby.q create mode 100644 ql/src/test/results/clientpositive/tez/windowing_gby.q.out diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2d8117f..a2ccfe0 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -421,7 +421,8 @@ minitez.query.files=bucket_map_join_tez1.q,\ vectorized_dynamic_partition_pruning.q,\ tez_multi_union.q,\ tez_join.q,\ - tez_union_multiinsert.q + tez_union_multiinsert.q,\ + windowing_gby.q diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 21423c1..63bf8f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -325,7 +325,9 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept || e instanceof CalciteSemanticException) { reAnalyzeAST = true; } else if (e instanceof SemanticException) { - throw (SemanticException) e; + // although, its likely to be a valid exception, we will retry + // with cbo off anyway. + reAnalyzeAST = true; } else if (e instanceof RuntimeException) { throw (RuntimeException) e; } else { @@ -901,26 +903,26 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu list.add(mdProvider.getMetadataProvider()); RelTraitSet desiredTraits = cluster .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); - + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveRelFactories.HIVE_JOIN_FACTORY, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveRelFactories.HIVE_FILTER_FACTORY)); - + HepProgram hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); - + hepPlanner.registerMetadataProviders(list); RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); - + RelNode rootRel = calcitePreCboPlan; hepPlanner.setRoot(rootRel); if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); } hepPlanner.setRoot(rootRel); - + calciteOptimizedPlan = hepPlanner.findBestExp(); } catch (Exception e) { boolean isMissingStats = noColsMissingStats.get() > 0; diff --git a/ql/src/test/queries/clientpositive/windowing_gby.q b/ql/src/test/queries/clientpositive/windowing_gby.q new file mode 100644 index 0000000..b948f76 --- /dev/null +++ b/ql/src/test/queries/clientpositive/windowing_gby.q @@ -0,0 +1,6 @@ +explain + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +; diff --git a/ql/src/test/results/clientpositive/tez/windowing_gby.q.out b/ql/src/test/results/clientpositive/tez/windowing_gby.q.out new file mode 100644 index 0000000..e65533f --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/windowing_gby.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: explain + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +PREHOOK: type: QUERY +POSTHOOK: query: explain + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_17] + compressed:false + Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [SEL_15] + outputColumnNames:["_col0"] + Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + PTF Operator [PTF_14] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"name:":"windowingtablefunction","order by:":"_col0","partition by:":"0"}] + Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_13] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:0 (type: int), _col0 (type: double) + Map-reduce partition columns:0 (type: int) + sort order:++ + Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_11] + outputColumnNames:["_col0"] + Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_10] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | keys:KEY._col0 (type: boolean) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: boolean) + Map-reduce partition columns:_col0 (type: boolean) + sort order:+ + Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_8] + aggregations:["sum(_col10)","sum(_col2)"] + keys:_col4 (type: boolean) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_7] + outputColumnNames:["_col4","_col10","_col2"] + Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_22] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"value (type: string)","1":"cstring1 (type: string)"} + | outputColumnNames:["_col2","_col4","_col10"] + | Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:value (type: string) + | Map-reduce partition columns:value (type: string) + | sort order:+ + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + | value expressions:c_int (type: int), c_boolean (type: boolean) + | Filter Operator [FIL_20] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:ws + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:cstring1 (type: string) + Map-reduce partition columns:cstring1 (type: string) + sort order:+ + Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + value expressions:cint (type: int) + Filter Operator [FIL_21] + predicate:cstring1 is not null (type: boolean) + Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_1] + alias:wr + Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + -- 1.7.12.4 (Apple Git-37)