Index: ql/src/test/results/clientpositive/query_properties.q.out =================================================================== --- ql/src/test/results/clientpositive/query_properties.q.out (revision 0) +++ ql/src/test/results/clientpositive/query_properties.q.out (revision 0) @@ -0,0 +1,143 @@ +PREHOOK: query: select * from src a join src b on a.key = b.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-05-36_498_5805697535865823342/-mr-10000 +Has Join: true +Has Group By: false +Has Sort By: false +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 val_0 0 val_0 +PREHOOK: query: select * from src group by src.key, src.value limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-05-43_789_6053740454386954709/-mr-10000 +Has Join: false +Has Group By: true +Has Sort By: false +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 val_0 +PREHOOK: query: select * from src order by src.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-05-49_494_5633179368399155497/-mr-10000 +Has Join: false +Has Group By: false +Has Sort By: false +Has Order By: true +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 val_0 +PREHOOK: query: select * from src sort by src.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-05-54_060_1716409037097699334/-mr-10000 +Has Join: false +Has Group By: false +Has Sort By: true +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 val_0 +PREHOOK: query: select a.key, sum(b.value) from src a join src b on a.key = b.key group by a.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-03_249_6504006179961788750/-mr-10000 +Has Join: true +Has Group By: true +Has Sort By: false +Has Order By: false +Has Group By After Join: true +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 0.0 +PREHOOK: query: select transform(*) using 'cat' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-12_911_7465302042210333387/-mr-10000 +Has Join: false +Has Group By: false +Has Sort By: false +Has Order By: false +Has Group By After Join: false +Uses Script: true +Has Distribute By: false +Has Cluster By: false +238 val_238 +PREHOOK: query: select * from src distribute by src.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-16_972_9071341919141171271/-mr-10000 +Has Join: false +Has Group By: false +Has Sort By: false +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: true +Has Cluster By: false +238 val_238 +PREHOOK: query: select * from src cluster by src.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-22_255_3542540661923174882/-mr-10000 +Has Join: false +Has Group By: false +Has Sort By: false +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: true +0 val_0 +PREHOOK: query: select key, sum(value) from (select a.key as key, b.value as value from src a join src b on a.key = b.key) c group by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-31_368_2134351032646287700/-mr-10000 +Has Join: true +Has Group By: true +Has Sort By: false +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 0.0 +PREHOOK: query: select * from src a join src b on a.key = b.key order by a.key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-41_181_4145247126888779334/-mr-10000 +Has Join: true +Has Group By: false +Has Sort By: false +Has Order By: true +Has Group By After Join: false +Uses Script: false +Has Distribute By: false +Has Cluster By: false +0 val_0 0 val_0 +PREHOOK: query: select * from src a join src b on a.key = b.key distribute by a.key sort by a.key, b.value limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-09-19_10-06-50_866_7828522265237898435/-mr-10000 +Has Join: true +Has Group By: false +Has Sort By: true +Has Order By: false +Has Group By After Join: false +Uses Script: false +Has Distribute By: true +Has Cluster By: false +0 val_0 0 val_0 Index: ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckQueryPropertiesHook.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckQueryPropertiesHook.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckQueryPropertiesHook.java (revision 0) @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.hooks; + +import org.apache.hadoop.hive.ql.QueryProperties; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; + +/** + * + * CheckQueryPropertiesHook. + * + * This hook prints the values in the QueryProperties object contained in the QueryPlan + * in the HookContext passed to the hook. + */ +public class CheckQueryPropertiesHook implements ExecuteWithHookContext { + + public void run(HookContext hookContext) { + LogHelper console = SessionState.getConsole(); + + if (console == null) { + return; + } + + QueryProperties queryProps = hookContext.getQueryPlan().getQueryProperties(); + + if (queryProps != null) { + console.printError("Has Join: " + queryProps.hasJoin()); + console.printError("Has Group By: " + queryProps.hasGroupBy()); + console.printError("Has Sort By: " + queryProps.hasSortBy()); + console.printError("Has Order By: " + queryProps.hasOrderBy()); + console.printError("Has Group By After Join: " + queryProps.hasJoinFollowedByGroupBy()); + console.printError("Uses Script: " + queryProps.usesScript()); + console.printError("Has Distribute By: " + queryProps.hasDistributeBy()); + console.printError("Has Cluster By: " + queryProps.hasClusterBy()); + } + } +} \ No newline at end of file Index: ql/src/test/queries/clientpositive/query_properties.q =================================================================== --- ql/src/test/queries/clientpositive/query_properties.q (revision 0) +++ ql/src/test/queries/clientpositive/query_properties.q (revision 0) @@ -0,0 +1,14 @@ +set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.CheckQueryPropertiesHook; + +select * from src a join src b on a.key = b.key limit 1; +select * from src group by src.key, src.value limit 1; +select * from src order by src.key limit 1; +select * from src sort by src.key limit 1; +select a.key, sum(b.value) from src a join src b on a.key = b.key group by a.key limit 1; +select transform(*) using 'cat' from src limit 1; +select * from src distribute by src.key limit 1; +select * from src cluster by src.key limit 1; + +select key, sum(value) from (select a.key as key, b.value as value from src a join src b on a.key = b.key) c group by key limit 1; +select * from src a join src b on a.key = b.key order by a.key limit 1; +select * from src a join src b on a.key = b.key distribute by a.key sort by a.key, b.value limit 1; \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (revision 1170719) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (working copy) @@ -88,6 +88,8 @@ private HashSet done; private HashSet started; + private QueryProperties queryProperties; + public QueryPlan() { } @@ -110,6 +112,7 @@ counters = new HashMap>(); done = new HashSet(); started = new HashSet(); + queryProperties = sem.getQueryProperties(); } public String getQueryStr() { @@ -737,4 +740,8 @@ public void setLineageInfo(LineageInfo linfo) { this.linfo = linfo; } + + public QueryProperties getQueryProperties() { + return queryProperties; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java (revision 0) @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +/** + * + * QueryProperties. + * + * A structure to contain features of a query that are determined + * during parsing and may be useful for categorizing a query type + * + * These inlucde whether the query contains: + * a join clause, a group by clause, an order by clause, a sort by + * clause, a group by clause following a join clause, and whether + * the query uses a script for mapping/reducing + */ +public class QueryProperties { + + boolean hasJoin = false; + boolean hasGroupBy = false; + boolean hasOrderBy = false; + boolean hasSortBy = false; + boolean hasJoinFollowedByGroupBy = false; + + // does the query have a using clause + boolean usesScript = false; + + boolean hasDistributeBy = false; + boolean hasClusterBy = false; + + public boolean hasJoin() { + return hasJoin; + } + + public void setHasJoin(boolean hasJoin) { + this.hasJoin = hasJoin; + } + + public boolean hasGroupBy() { + return hasGroupBy; + } + + public void setHasGroupBy(boolean hasGroupBy) { + this.hasGroupBy = hasGroupBy; + } + + public boolean hasOrderBy() { + return hasOrderBy; + } + + public void setHasOrderBy(boolean hasOrderBy) { + this.hasOrderBy = hasOrderBy; + } + + public boolean hasSortBy() { + return hasSortBy; + } + + public void setHasSortBy(boolean hasSortBy) { + this.hasSortBy = hasSortBy; + } + + public boolean hasJoinFollowedByGroupBy() { + return hasJoinFollowedByGroupBy; + } + + public void setHasJoinFollowedByGroupBy(boolean hasJoinFollowedByGroupBy) { + this.hasJoinFollowedByGroupBy = hasJoinFollowedByGroupBy; + } + + public boolean usesScript() { + return usesScript; + } + + public void setUsesScript(boolean usesScript) { + this.usesScript = usesScript; + } + + public boolean hasDistributeBy() { + return hasDistributeBy; + } + + public void setHasDistributeBy(boolean hasDistributeBy) { + this.hasDistributeBy = hasDistributeBy; + } + + public boolean hasClusterBy() { + return hasClusterBy; + } + + public void setHasClusterBy(boolean hasClusterBy) { + this.hasClusterBy = hasClusterBy; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 1170719) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -71,6 +72,7 @@ protected Context ctx; protected HashMap idToTableNameMap; + protected QueryProperties queryProperties; public static int HIVE_COLUMN_ORDER_ASC = 1; public static int HIVE_COLUMN_ORDER_DESC = 0; @@ -762,4 +764,8 @@ public Hive getDb() { return db; } + + public QueryProperties getQueryProperties() { + return queryProperties; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1170719) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -27,9 +27,9 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; -import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; @@ -92,6 +93,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; @@ -101,7 +103,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -122,6 +123,7 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; @@ -144,13 +146,12 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -158,9 +159,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -279,6 +280,7 @@ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME); + queryProperties = new QueryProperties(); } @Override @@ -759,6 +761,7 @@ } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) { processLateralView(qb, frm); } else if (isJoinToken(frm)) { + queryProperties.setHasJoin(true); processJoin(qb, frm); qbp.setJoinExpr(frm); } @@ -767,6 +770,7 @@ case HiveParser.TOK_CLUSTERBY: // Get the clusterby aliases - these are aliased to the entries in the // select list + queryProperties.setHasClusterBy(true); qbp.setClusterByExprForClause(ctx_1.dest, ast); break; @@ -774,6 +778,7 @@ // Get the distribute by aliases - these are aliased to the entries in // the // select list + queryProperties.setHasDistributeBy(true); qbp.setDistributeByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, @@ -787,6 +792,7 @@ case HiveParser.TOK_SORTBY: // Get the sort by aliases - these are aliased to the entries in the // select list + queryProperties.setHasSortBy(true); qbp.setSortByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, @@ -801,6 +807,7 @@ case HiveParser.TOK_ORDERBY: // Get the order by aliases - these are aliased to the entries in the // select list + queryProperties.setHasOrderBy(true); qbp.setOrderByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, @@ -811,6 +818,10 @@ case HiveParser.TOK_GROUPBY: // Get the groupby aliases - these are aliased to the entries in the // select list + queryProperties.setHasGroupBy(true); + if (qbp.getJoinExpr() != null) { + queryProperties.setHasJoinFollowedByGroupBy(true); + } if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); @@ -2066,6 +2077,7 @@ boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); if (isInTransform) { + queryProperties.setUsesScript(true); globalLimitCtx.setHasTransformOrUDTF(true); trfm = (ASTNode) selExprList.getChild(posn).getChild(0); }